diff --git a/oletools/olevba.py b/oletools/olevba.py index 494a47c..5ef162f 100755 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -76,7 +76,7 @@ https://github.com/unixfreak0037/officeparser # CHANGELOG: # 2014-08-05 v0.01 PL: - first version based on officeparser code # 2014-08-14 v0.02 PL: - fixed bugs in code, added license from officeparser -# 2014-08-15 PL: - fixed incorrect value check in PROJECTHELPFILEPATH Record +# 2014-08-15 PL: - fixed incorrect value check in projecthelpfilepath Record # 2014-08-15 v0.03 PL: - refactored extract_macros to support OpenXML formats # and to find the VBA project root anywhere in the file # 2014-11-29 v0.04 PL: - use olefile instead of OleFileIO_PL @@ -169,6 +169,9 @@ https://github.com/unixfreak0037/officeparser # 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate # - updated suspicious keywords # 2016-05-04 v0.47 PL: - look for VBA code in any stream including orphans +# 2016-04-28 CH: - return an exit code depending on the results +# - improved error and exception handling +# - improved JSON output __version__ = '0.47' @@ -212,10 +215,8 @@ import math import zipfile import re import optparse -import os.path import binascii import base64 -import traceback import zlib import email # for MHTML parsing import string # for printable @@ -240,8 +241,12 @@ except ImportError: import thirdparty.olefile as olefile from thirdparty.prettytable import prettytable -from thirdparty.xglob import xglob -from thirdparty.pyparsing.pyparsing import * +from thirdparty.xglob import xglob, PathNotFoundException +from thirdparty.pyparsing.pyparsing import \ + CaselessKeyword, CaselessLiteral, Combine, Forward, Literal, \ + Optional, QuotedString,Regex, Suppress, Word, WordStart, \ + alphanums, alphas, hexnums,nums, opAssoc, srange, \ + infixNotation # monkeypatch email to fix issue #32: # allow header lines without ":" @@ -291,8 +296,51 @@ def get_logger(name, level=logging.CRITICAL+1): log = get_logger('olevba') +#=== EXCEPTIONS ============================================================== + +class FileOpenError(Exception): + """ raised by VBA_Parser constructor if all open_... attempts failed + + probably means the file type is not supported + """ + + def __init__(self, filename): + super(FileOpenError, self).__init__( + 'Failed to open file %s ... probably not supported' % filename) + self.filename = filename + + +class ProcessingError(Exception): + """ raised by VBA_Parser.process_file* functions """ + + def __init__(self, filename, orig_exception): + super(ProcessingError, self).__init__( + 'Error processing file %s (%s)' % (filename, orig_exception)) + self.filename = filename + self.orig_exception = orig_exception + + +class MsoExtractionError(RuntimeError): + """ raised by mso_file_extract if parsing MSO/ActiveMIME data failed """ + + def __init__(self, msg): + super(MsoExtractionError, self).__init__(msg) + self.msg = msg + + #--- CONSTANTS ---------------------------------------------------------------- +# return codes +RETURN_OK = 0 +RETURN_WARNINGS = 1 # (reserved, not used yet) +RETURN_WRONG_ARGS = 2 # (fixed, built into optparse) +RETURN_FILE_NOT_FOUND = 3 +RETURN_XGLOB_ERR = 4 +RETURN_OPEN_ERROR = 5 +RETURN_PARSE_ERROR = 6 +RETURN_SEVERAL_ERRS = 7 +RETURN_UNEXPECTED = 8 + # URL and message to report issues: URL_OLEVBA_ISSUES = 'https://bitbucket.org/decalage/oletools/issues' MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES @@ -846,36 +894,37 @@ def mso_file_extract(data): :param data: bytes string, MSO/ActiveMime file content :return: bytes string, extracted data (uncompressed) - raise a RuntimeError if the data cannot be extracted + raise a MsoExtractionError if the data cannot be extracted """ # check the magic: assert is_mso_file(data) + + # In all the samples seen so far, Word always uses an offset of 0x32, + # and Excel 0x22A. But we read the offset from the header to be more + # generic. + offsets = [0x32, 0x22A] + # First, attempt to get the compressed data offset from the header # According to my tests, it should be an unsigned 16 bits integer, # at offset 0x1E (little endian) + add 46: try: offset = struct.unpack_from(' 20: - print type(j), repr(j[:20]), '...(len {0})'.format(len(j)) - else: - print type(j), repr(j) - except TypeError: - print type(j), repr(j) - - def copytoken_help(decompressed_current, decompressed_chunk_start): """ compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help @@ -1057,7 +1081,7 @@ def decompress_stream(compressed_container): copy_token = \ struct.unpack(" 128: - log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(PROJECTNAME_SizeOfProjectName)) - PROJECTNAME_ProjectName = dir_stream.read(PROJECTNAME_SizeOfProjectName) + projectname_id = struct.unpack(" 128: + log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(projectname_sizeof_projectname)) + projectname_projectname = dir_stream.read(projectname_sizeof_projectname) + unused = projectname_projectname # PROJECTDOCSTRING Record - PROJECTDOCSTRING_Id = struct.unpack(" 2000: + projectdocstring_id = struct.unpack(" 2000: log.error( - "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString)) - PROJECTDOCSTRING_DocString = dir_stream.read(PROJECTDOCSTRING_SizeOfDocString) - PROJECTDOCSTRING_Reserved = struct.unpack(" 260: + projecthelpfilepath_id = struct.unpack(" 260: log.error( - "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1)) - PROJECTHELPFILEPATH_HelpFile1 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile1) - PROJECTHELPFILEPATH_Reserved = struct.unpack(" 1015: + projectconstants_id = struct.unpack(" 1015: log.error( - "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants)) - PROJECTCONSTANTS_Constants = dir_stream.read(PROJECTCONSTANTS_SizeOfConstants) - PROJECTCONSTANTS_Reserved = struct.unpack(" 0: code_data = decompress_stream(code_data) # case-insensitive search in the code_modules dict to find the file extension: - filext = code_modules.get(MODULENAME_ModuleName.lower(), 'bin') - filename = '{0}.{1}'.format(MODULENAME_ModuleName, filext) + filext = code_modules.get(modulename_modulename.lower(), 'bin') + filename = '{0}.{1}'.format(modulename_modulename, filext) #TODO: also yield the codepage so that callers can decode it properly yield (code_path, filename, code_data) # print '-'*79 @@ -1460,7 +1528,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path): # print '' log.debug('extracted file {0}'.format(filename)) else: - log.warning("module stream {0} has code data length 0".format(MODULESTREAMNAME_StreamName)) + log.warning("module stream {0} has code data length 0".format(modulestreamname_streamname)) + _ = unused return @@ -1616,12 +1685,9 @@ def detect_base64_strings(vba_code): decoded = base64.b64decode(value) results.append((value, decoded)) found.add(value) - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: + except (TypeError, ValueError) as exc: + log.debug('Failed to base64-decode (%s)' % exc) # if an exception occurs, it is likely not a base64-encoded string - pass return results @@ -1646,12 +1712,9 @@ def detect_dridex_strings(vba_code): decoded = DridexUrlDecode(value) results.append((value, decoded)) found.add(value) - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: + except Exception as exc: + log.debug('Failed to Dridex-decode (%s)' % exc) # if an exception occurs, it is likely not a dridex-encoded string - pass return results @@ -1701,16 +1764,17 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): elif isinstance(json_obj, (bool, int, float)): pass elif isinstance(json_obj, str): + # de-code and re-encode dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) - if dencoded != str: - logging.info('json2ascii: replaced: {0} (len {1})' - .format(json_obj, len(json_obj))) - logging.info('json2ascii: with: {0} (len {1})' - .format(dencoded, len(dencoded))) + if dencoded != json_obj: + log.info('json2ascii: replaced: {0} (len {1})' + .format(json_obj, len(json_obj))) + log.info('json2ascii: with: {0} (len {1})' + .format(dencoded, len(dencoded))) return dencoded elif isinstance(json_obj, unicode): - logging.info('json2ascii: replaced: {0}' - .format(json_obj.encode(encoding, errors))) + log.info('json2ascii: replaced: {0}' + .format(json_obj.encode(encoding, errors))) # cannot put original into logger # print 'original: ' json_obj return json_obj.encode(encoding, errors) @@ -1721,11 +1785,50 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): for item in json_obj: item = json2ascii(item) else: - logging.debug('unexpected type in json2ascii: {0} -- leave as is' - .format(type(json_obj))) + log.debug('unexpected type in json2ascii: {0} -- leave as is' + .format(type(json_obj))) return json_obj +_have_printed_json_start = False + +def print_json(json_dict=None, _json_is_last=False, **json_parts): + """ line-wise print of json.dumps(json2ascii(..)) with options and indent+1 + + can use in two ways: + (1) print_json(some_dict) + (2) print_json(key1=value1, key2=value2, ...) + + :param bool _json_is_last: set to True only for very last entry to complete + the top-level json-list + """ + global _have_printed_json_start + + if json_dict and json_parts: + raise ValueError('Invalid json argument: want either single dict or ' + 'key=value parts but got both)') + elif (json_dict is not None) and (not isinstance(json_dict, dict)): + raise ValueError('Invalid json argument: want either single dict or ' + 'key=value parts but got {} instead of dict)' + .format(type(json_dict))) + if json_parts: + json_dict = json_parts + + if not _have_printed_json_start: + print '[' + _have_printed_json_start = True + + lines = json.dumps(json2ascii(json_dict), check_circular=False, + indent=4, ensure_ascii=False).splitlines() + for line in lines[:-1]: + print ' {}'.format(line) + if _json_is_last: + print ' {}'.format(lines[-1]) # print last line without comma + print ']' + else: + print ' {},'.format(lines[-1]) # print last line with comma + + class VBA_Scanner(object): """ Class to scan the source code of a VBA module to find obfuscated strings, @@ -1924,6 +2027,8 @@ class VBA_Parser(object): :param container: str, path and filename of container if the file is within a zip archive, None otherwise. + + raises a FileOpenError if all attemps to interpret the data header failed """ #TODO: filename should only be a string, data should be used for the file-like object #TODO: filename should be mandatory, optional data is a string or file-like object @@ -2000,8 +2105,8 @@ class VBA_Parser(object): if self.type is None: # At this stage, could not match a known format: msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename - log.error(msg) - raise TypeError(msg) + log.info(msg) + raise FileOpenError(msg) def open_ole(self, _file): """ @@ -2016,13 +2121,10 @@ class VBA_Parser(object): # TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet # set type only if parsing succeeds self.type = TYPE_OLE - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: + except (IOError, TypeError, ValueError) as exc: # TODO: handle OLE parsing exceptions - log.exception('Failed OLE parsing for file %r' % self.filename) - pass + log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc)) + log.debug('Trace:', exc_info=True) def open_openxml(self, _file): @@ -2048,22 +2150,17 @@ class VBA_Parser(object): ole_data = z.open(subfile).read() try: self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data)) - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: - log.debug('%s is not a valid OLE file' % subfile) + except FileOpenError as exc: + log.info('%s is not a valid OLE file (%s)' % (subfile, exc)) continue z.close() # set type only if parsing succeeds self.type = TYPE_OpenXML - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: + except (RuntimeError, zipfile.BadZipfile, zipfile.LargeZipFile, IOError) as exc: # TODO: handle parsing exceptions - log.exception('Failed Zip/OpenXML parsing for file %r' % self.filename) - pass + log.info('Failed Zip/OpenXML parsing for file %r (%s)' + % (self.filename, exc)) + log.debug('Trace:', exc_info=True) def open_word2003xml(self, data): """ @@ -2087,25 +2184,25 @@ class VBA_Parser(object): if is_mso_file(mso_data): # decompress the zlib data stored in the MSO file, which is the OLE container: # TODO: handle different offsets => separate function - ole_data = mso_file_extract(mso_data) try: + ole_data = mso_file_extract(mso_data) self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: - log.error('%s does not contain a valid OLE file' % fname) + except MsoExtractionError: + log.info('Failed decompressing an MSO container in %r - %s' + % (fname, MSG_OLEVBA_ISSUES)) + log.debug('Trace:', exc_info=True) + except FileOpenError as exc: + log.debug('%s is not a valid OLE sub file (%s)' % (fname, exc)) else: - log.error('%s is not a valid MSO file' % fname) + log.info('%s is not a valid MSO file' % fname) # set type only if parsing succeeds self.type = TYPE_Word2003_XML - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: + except Exception as exc: # TODO: differentiate exceptions for each parsing stage - log.exception('Failed XML parsing for file %r' % self.filename) - pass + # (but ET is different libs, no good exception description in API) + # found: XMLSyntaxError + log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc)) + log.debug('Trace:', exc_info=True) def open_mht(self, data): """ @@ -2148,40 +2245,30 @@ class VBA_Parser(object): log.debug('Found ActiveMime header, decompressing MSO container') try: ole_data = mso_file_extract(part_data) - try: - # TODO: check if it is actually an OLE file - # TODO: get the MSO filename from content_location? - self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: - log.debug('%s does not contain a valid OLE file' % fname) - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: - log.exception('Failed decompressing an MSO container in %r - %s' + + # TODO: check if it is actually an OLE file + # TODO: get the MSO filename from content_location? + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) + except MsoExtractionError: + log.info('Failed decompressing an MSO container in %r - %s' % (fname, MSG_OLEVBA_ISSUES)) + log.debug('Trace:', exc_info=True) # TODO: bug here - need to split in smaller functions/classes? + except FileOpenError as exc: + log.debug('%s does not contain a valid OLE file (%s)' + % (fname, exc)) else: + log.debug('type(part_data) = %s' % type(part_data)) try: - log.debug('type(part_data) = %s' % type(part_data)) log.debug('part_data[0:20] = %r' % part_data[0:20]) - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: - pass + except TypeError as err: + log.debug('part_data has no __getitem__') # set type only if parsing succeeds self.type = TYPE_MHTML - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: - log.exception('Failed MIME parsing for file %r - %s' - % (self.filename, MSG_OLEVBA_ISSUES)) - pass + except Exception: + log.info('Failed MIME parsing for file %r - %s' + % (self.filename, MSG_OLEVBA_ISSUES)) + log.debug('Trace:', exc_info=True) def open_text(self, data): @@ -2191,19 +2278,11 @@ class VBA_Parser(object): :return: nothing """ log.info('Opening text file %s' % self.filename) - try: - # directly store the source code: - self.vba_code_all_modules = data - self.contains_macros = True - # set type only if parsing succeeds - self.type = TYPE_TEXT - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: - log.exception('Failed text parsing for file %r - %s' - % (self.filename, MSG_OLEVBA_ISSUES)) - pass + # directly store the source code: + self.vba_code_all_modules = data + self.contains_macros = True + # set type only if parsing succeeds + self.type = TYPE_TEXT def find_vba_projects(self): @@ -2247,6 +2326,15 @@ class VBA_Parser(object): # - The root/VBA storage MUST contain a _VBA_PROJECT stream and a dir stream # - all names are case-insensitive + def check_vba_stream(ole, vba_root, stream_path): + full_path = vba_root + stream_path + if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM: + log.debug('Found %s stream: %s' % (stream_path, full_path)) + return full_path + else: + log.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path) + return False + # start with an empty list: self.vba_projects = [] # Look for any storage containing those storage/streams: @@ -2263,15 +2351,6 @@ class VBA_Parser(object): vba_root += '/' log.debug('Checking vba_root="%s"' % vba_root) - def check_vba_stream(ole, vba_root, stream_path): - full_path = vba_root + stream_path - if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM: - log.debug('Found %s stream: %s' % (stream_path, full_path)) - return full_path - else: - log.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path) - return False - # Check if the VBA root storage also contains a PROJECT stream: project_path = check_vba_stream(ole, vba_root, 'PROJECT') if not project_path: continue @@ -2436,10 +2515,10 @@ class VBA_Parser(object): # variable to merge source code from all modules: if self.vba_code_all_modules is None: self.vba_code_all_modules = '' - for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): + for (_, _, _, vba_code) in self.extract_all_macros(): #TODO: filter code? (each module) self.vba_code_all_modules += vba_code + '\n' - for (subfilename, form_path, form_string) in self.extract_form_strings(): + for (_, _, form_string) in self.extract_form_strings(): self.vba_code_all_modules += form_string + '\n' # Analyze the whole code at once: scanner = VBA_Scanner(self.vba_code_all_modules) @@ -2587,8 +2666,7 @@ class VBA_Parser_CLI(VBA_Parser): def __init__(self, filename, data=None, container=None): """ Constructor for VBA_Parser_CLI. - Calls __init__ from VBA_Parser, but handles the TypeError exception - when the file type is not supported. + Calls __init__ from VBA_Parser :param filename: filename or path of file to parse, or file-like object @@ -2599,11 +2677,7 @@ class VBA_Parser_CLI(VBA_Parser): :param container: str, path and filename of container if the file is within a zip archive, None otherwise. """ - try: - VBA_Parser.__init__(self, filename, data=data, container=container) - except TypeError: - # in that case, self.type=None - pass + super(VBA_Parser_CLI, self).__init__(filename, data=data, container=container) def print_analysis(self, show_decoded_strings=False, deobfuscate=False): @@ -2653,7 +2727,7 @@ class VBA_Parser_CLI(VBA_Parser): for kw_type, keyword, description in self.analyze_macros(show_decoded_strings)] def process_file(self, show_decoded_strings=False, - display_code=True, global_analysis=True, hide_attributes=True, + display_code=True, hide_attributes=True, vba_code_only=False, show_deobfuscated_code=False, deobfuscate=False): """ @@ -2699,19 +2773,12 @@ class VBA_Parser_CLI(VBA_Parser): print '(empty macro)' else: print vba_code_filtered - if not global_analysis and not vba_code_only: - #TODO: remove this option - raise NotImplementedError - print '- ' * 39 - print 'ANALYSIS:' - # analyse each module's code, filtered to avoid false positives: - self.print_analysis(show_decoded_strings, deobfuscate) for (subfilename, stream_path, form_string) in self.extract_form_strings(): print '-' * 79 print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path) print '- ' * 39 print form_string - if global_analysis and not vba_code_only: + if not vba_code_only: # analyse the code from all modules at once: self.print_analysis(show_decoded_strings, deobfuscate) if show_deobfuscated_code: @@ -2719,20 +2786,16 @@ class VBA_Parser_CLI(VBA_Parser): print self.reveal() else: print 'No VBA macros found.' - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: #TypeError: - #raise - #TODO: print more info if debug mode - #print sys.exc_value - # display the exception with full stack trace for debugging, but do not stop: - traceback.print_exc() + except Exception as exc: + # display the exception with full stack trace for debugging + log.info('Error processing file %s (%s)' % (self.filename, exc)) + log.debug('Traceback:', exc_info=True) + raise ProcessingError(self.filename, exc) print '' def process_file_json(self, show_decoded_strings=False, - display_code=True, global_analysis=True, hide_attributes=True, + display_code=True, hide_attributes=True, vba_code_only=False, show_deobfuscated_code=False): """ Process a single file @@ -2781,27 +2844,19 @@ class VBA_Parser_CLI(VBA_Parser): curr_macro['ole_stream'] = stream_path if display_code: curr_macro['code'] = vba_code_filtered.strip() - if not global_analysis and not vba_code_only: - # analyse each module's code, filtered to avoid false positives: - #TODO: remove this option - curr_macro['analysis'] = self.print_analysis_json(show_decoded_strings) macros.append(curr_macro) - if global_analysis and not vba_code_only: + if not vba_code_only: # analyse the code from all modules at once: result['analysis'] = self.print_analysis_json(show_decoded_strings) if show_deobfuscated_code: result['code_deobfuscated'] = self.reveal() result['macros'] = macros result['json_conversion_successful'] = True - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: #TypeError: - #raise - #TODO: print more info if debug mode - #print sys.exc_value - # display the exception with full stack trace for debugging, but do not stop: - traceback.print_exc() + except Exception as exc: + # display the exception with full stack trace for debugging + log.info('Error processing file %s (%s)' % (self.filename, exc)) + log.debug('Traceback:', exc_info=True) + raise ProcessingError(self.filename, exc) return result @@ -2811,57 +2866,46 @@ class VBA_Parser_CLI(VBA_Parser): Process a file in triage mode, showing only summary results on one line. """ #TODO: replace print by writing to a provided output file (sys.stdout by default) - message = '' try: - if self.type is not None: - #TODO: handle olefile errors, when an OLE file is malformed - if self.detect_vba_macros(): - # print a waiting message only if the output is not redirected to a file: - if sys.stdout.isatty(): - print 'Analysis...\r', - sys.stdout.flush() - self.analyze_macros(show_decoded_strings=show_decoded_strings, - deobfuscate=deobfuscate) - flags = TYPE2TAG[self.type] - macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-' - if self.contains_macros: macros = 'M' - if self.nb_autoexec: autoexec = 'A' - if self.nb_suspicious: suspicious = 'S' - if self.nb_iocs: iocs = 'I' - if self.nb_hexstrings: hexstrings = 'H' - if self.nb_base64strings: base64obf = 'B' - if self.nb_dridexstrings: dridex = 'D' - if self.nb_vbastrings: vba_obf = 'V' - flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, - base64obf, dridex, vba_obf) - # old table display: - # macros = autoexec = suspicious = iocs = hexstrings = 'no' - # if nb_macros: macros = 'YES:%d' % nb_macros - # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec - # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious - # if nb_iocs: iocs = 'YES:%d' % nb_iocs - # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings - # # 2nd line = info - # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings) - else: - # self.type==None - # file type not OLE nor OpenXML - flags = '?' - message = 'File format not supported' - except KeyboardInterrupt: - # do not ignore exceptions when the user presses Ctrl+C/Pause: - raise - except: - # another error occurred - #raise - #TODO: print more info if debug mode - #TODO: distinguish real errors from incorrect file types - flags = '!ERROR' - message = sys.exc_value - line = '%-12s %s' % (flags, self.filename) - if message: - line += ' - %s' % message - print line + #TODO: handle olefile errors, when an OLE file is malformed + if self.detect_vba_macros(): + # print a waiting message only if the output is not redirected to a file: + if sys.stdout.isatty(): + print 'Analysis...\r', + sys.stdout.flush() + self.analyze_macros(show_decoded_strings=show_decoded_strings, + deobfuscate=deobfuscate) + flags = TYPE2TAG[self.type] + macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-' + if self.contains_macros: macros = 'M' + if self.nb_autoexec: autoexec = 'A' + if self.nb_suspicious: suspicious = 'S' + if self.nb_iocs: iocs = 'I' + if self.nb_hexstrings: hexstrings = 'H' + if self.nb_base64strings: base64obf = 'B' + if self.nb_dridexstrings: dridex = 'D' + if self.nb_vbastrings: vba_obf = 'V' + flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, + base64obf, dridex, vba_obf) + + line = '%-12s %s' % (flags, self.filename) + print line + + # old table display: + # macros = autoexec = suspicious = iocs = hexstrings = 'no' + # if nb_macros: macros = 'YES:%d' % nb_macros + # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec + # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious + # if nb_iocs: iocs = 'YES:%d' % nb_iocs + # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings + # # 2nd line = info + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings) + except Exception as exc: + # display the exception with full stack trace for debugging only + log.debug('Error processing file %s (%s)' % (self.filename, exc), + exc_info=True) + raise ProcessingError(self.filename, exc) + # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'), # header=False, border=False) @@ -2883,7 +2927,6 @@ def main(): """ Main function, called when olevba is run from the command line """ - global log DEFAULT_LOG_LEVEL = "warning" # Default log level LOG_LEVELS = { 'debug': logging.DEBUG, @@ -2939,13 +2982,14 @@ def main(): if len(args) == 0: print __doc__ parser.print_help() - sys.exit() + sys.exit(RETURN_WRONG_ARGS) # provide info about tool and its version if options.output_mode == 'json': - json_results = [dict(script_name='olevba', version=__version__, - url='http://decalage.info/python/oletools', - type='MetaInformation'), ] + # prints opening [ + print_json(script_name='olevba', version=__version__, + url='http://decalage.info/python/oletools', + type='MetaInformation') else: print 'olevba %s - http://decalage.info/python/oletools' % __version__ @@ -2971,65 +3015,120 @@ def main(): count = 0 container = filename = data = None vba_parser = None - for container, filename, data in xglob.iter_files(args, recursive=options.recursive, - zip_password=options.zip_password, zip_fname=options.zip_fname): - # ignore directory names stored in zip files: - if container and filename.endswith('/'): - continue - # Open the file - vba_parser = VBA_Parser_CLI(filename, data=data, container=container) - if options.output_mode == 'detailed': - # fully detailed output - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, - display_code=options.display_code, global_analysis=True, #options.global_analysis, - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, - show_deobfuscated_code=options.show_deobfuscated_code, - deobfuscate=options.deobfuscate) - elif options.output_mode in ('triage', 'unspecified'): - # print container name when it changes: - if container != previous_container: - if container is not None: - print '\nFiles in %s:' % container - previous_container = container - # summarized output for triage: - vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, - deobfuscate=options.deobfuscate) - elif options.output_mode == 'json': - json_results.append( - vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, - display_code=options.display_code, global_analysis=True, #options.global_analysis, - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, - show_deobfuscated_code=options.show_deobfuscated_code)) - else: # (should be impossible) - raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) - count += 1 - if options.output_mode == 'triage': - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ - 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ - 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' - - if count == 1 and options.output_mode == 'unspecified': - # if options -t, -d and -j were not specified and it's a single file, print details: - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, - display_code=options.display_code, global_analysis=True, #options.global_analysis, - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, - show_deobfuscated_code=options.show_deobfuscated_code, - deobfuscate=options.deobfuscate) - - if options.output_mode == 'json': - json_options = dict(check_circular=False, indent=4, ensure_ascii=False) - - # json.dump[s] cannot deal with unicode objects that are not properly - # encoded --> encode in own function: - json_results = json2ascii(json_results) - #print_json(json_results) - - # if False: # options.outfile: # (option currently commented out) - # with open(outfile, 'w') as write_handle: - # json.dump(write_handle, **json_options) - # else: - print json.dumps(json_results, **json_options) + return_code = RETURN_OK + try: + for container, filename, data in xglob.iter_files(args, recursive=options.recursive, + zip_password=options.zip_password, zip_fname=options.zip_fname): + # ignore directory names stored in zip files: + if container and filename.endswith('/'): + continue + + # handle errors from xglob + if isinstance(data, Exception): + if isinstance(data, PathNotFoundException): + if options.output_mode in ('triage', 'unspecified'): + print '%-12s %s - File not found' % ('?', filename) + elif options.output_mode != 'json': + log.error('Given path %r does not exist!' % filename) + return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \ + else RETURN_SEVERAL_ERRS + else: + if options.output_mode in ('triage', 'unspecified'): + print '%-12s %s - Failed to read from zip file %s' % ('?', filename, container) + elif options.output_mode != 'json': + log.error('Exception opening/reading %r from zip file %r: %s' + % (filename, container, data)) + return_code = RETURN_XGLOB_ERR if return_code == 0 \ + else RETURN_SEVERAL_ERRS + if options.output_mode == 'json': + print_json(file=filename, type='error', + error=type(data).__name__, message=str(data)) + continue + try: + # Open the file + vba_parser = VBA_Parser_CLI(filename, data=data, container=container) + + if options.output_mode == 'detailed': + # fully detailed output + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, + display_code=options.display_code, + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, + show_deobfuscated_code=options.show_deobfuscated_code, + deobfuscate=options.deobfuscate) + elif options.output_mode in ('triage', 'unspecified'): + # print container name when it changes: + if container != previous_container: + if container is not None: + print '\nFiles in %s:' % container + previous_container = container + # summarized output for triage: + vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, + deobfuscate=options.deobfuscate) + elif options.output_mode == 'json': + print_json( + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, + display_code=options.display_code, + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, + show_deobfuscated_code=options.show_deobfuscated_code)) + else: # (should be impossible) + raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) + count += 1 + + except FileOpenError as exc: + if options.output_mode in ('triage', 'unspecified'): + print '%-12s %s - File format not supported' % ('?', filename) + elif options.output_mode == 'json': + print_json(file=filename, type='error', + error=type(exc).__name__, message=str(exc)) + else: + log.exception('Failed to open %s -- probably not supported!' % filename) + return_code = RETURN_OPEN_ERROR if return_code == 0 \ + else RETURN_SEVERAL_ERRS + except ProcessingError as exc: + if options.output_mode in ('triage', 'unspecified'): + print '%-12s %s - %s' % ('!ERROR', filename, exc.orig_exception) + elif options.output_mode == 'json': + print_json(file=filename, type='error', + error=type(exc).__name__, + message=str(exc.orig_exception)) + else: + log.exception('Error processing file %s (%s)!' + % (filename, exc.orig_exception)) + return_code = RETURN_PARSE_ERROR if return_code == 0 \ + else RETURN_SEVERAL_ERRS + finally: + if vba_parser is not None: + vba_parser.close() + + if options.output_mode == 'triage': + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ + 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ + 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' + + if count == 1 and options.output_mode == 'unspecified': + # if options -t, -d and -j were not specified and it's a single file, print details: + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, + display_code=options.display_code, + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, + show_deobfuscated_code=options.show_deobfuscated_code, + deobfuscate=options.deobfuscate) + + if options.output_mode == 'json': + # print last json entry (a last one without a comma) and closing ] + print_json(type='MetaInformation', return_code=return_code, + n_processed=count, _json_is_last=True) + + except Exception as exc: + # some unexpected error, maybe some of the types caught in except clauses + # above were not sufficient. This is very bad, so log complete trace at exception level + # and do not care about output mode + log.exception('Unhandled exception in main: %s' % exc, exc_info=True) + return_code = RETURN_UNEXPECTED # even if there were others before -- this is more important + + # done. exit + log.debug('will exit now with code %s' % return_code) + sys.exit(return_code) if __name__ == '__main__': main() diff --git a/oletools/thirdparty/xglob/xglob.py b/oletools/thirdparty/xglob/xglob.py index 48026a4..29f3398 100644 --- a/oletools/thirdparty/xglob/xglob.py +++ b/oletools/thirdparty/xglob/xglob.py @@ -60,6 +60,15 @@ __version__ = '0.05' import os, fnmatch, glob, zipfile +#=== EXCEPTIONS ============================================================== + +class PathNotFoundException(Exception): + """ raised if given a fixed file/dir (not a glob) that does not exist """ + def __init__(self, path): + super(PathNotFoundException, self).__init__( + 'Given path does not exist: %r' % path) + + #=== FUNCTIONS =============================================================== # recursive glob function to find files in any subfolder: @@ -118,8 +127,11 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname='*'): - then files matching zip_fname are opened from the zip archive Iterator: yields (container, filename, data) for each file. If zip_password is None, then - only the filename is returned, container and data=None. Otherwise container si the - filename of the container (zip file), and data is the file content. + only the filename is returned, container and data=None. Otherwise container is the + filename of the container (zip file), and data is the file content (or an exception). + If a given filename is not a glob and does not exist, the triplet + (None, filename, PathNotFoundException) is yielded. (Globs matching nothing + do not trigger exceptions) """ #TODO: catch exceptions and yield them for the caller (no file found, file is not zip, wrong password, etc) #TODO: use logging instead of printing @@ -131,6 +143,9 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname='*'): else: iglob = glob.iglob for filespec in files: + if not is_glob(filespec) and not os.path.exists(filespec): + yield None, filespec, PathNotFoundException(filespec) + continue for filename in iglob(filespec): if zip_password is not None: # Each file is expected to be a zip archive: @@ -153,3 +168,39 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname='*'): #data = open(filename, 'rb').read() #yield None, filename, data + +def is_glob(filespec): + """ determine if given file specification is a single file name or a glob + + python's glob and fnmatch can only interpret ?, *, [list], and [ra-nge], + (and combinations: hex_*_[A-Fabcdef0-9]). + The special chars *?[-] can only be escaped using [] + --> file_name is not a glob + --> file?name is a glob + --> file* is a glob + --> file[-._]name is a glob + --> file[?]name is not a glob (matches literal "file?name") + --> file[*]name is not a glob (matches literal "file*name") + --> file[-]name is not a glob (matches literal "file-name") + --> file-name is not a glob + + Also, obviously incorrect globs are treated as non-globs + --> file[name is not a glob (matches literal "file[name") + --> file]-[name is treated as a glob + (it is not a valid glob but detecting errors like this requires + sophisticated regular expression matching) + + Python's glob also works with globs in directory-part of path + --> dir-part of path is analyzed just like filename-part + --> thirdparty/*/xglob.py is a (valid) glob + + TODO: create a correct regexp to test for validity of ranges + """ + + # remove escaped special chars + cleaned = filespec.replace('[*]', '').replace('[?]', '') \ + .replace('[[]', '').replace('[]]', '').replace('[-]', '') + + # check if special chars remain + return '*' in cleaned or '?' in cleaned or \ + ('[' in cleaned and ']' in cleaned)