diff --git a/oletools/olevba.py b/oletools/olevba.py index ce0d971..0155ece 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -227,7 +227,7 @@ from __future__ import print_function # 2020-01-31 v0.56 KS: - added option --no-xlm, improved MHT detection # 2020-03-22 PL: - uses plugin_biff to display DCONN objects and their URL -__version__ = '0.56dev4' +__version__ = '0.56dev5' #------------------------------------------------------------------------------ # TODO: @@ -795,6 +795,8 @@ SUSPICIOUS_KEYWORDS = { 'DisableUnsafeLocationsInPV', 'blockcontentexecutionfrominternet'), 'May attempt to modify the VBA code (self-modification)': ('VBProject', 'VBComponents', 'CodeModule', 'AddFromString'), + 'May modify Excel 4 Macro formulas at runtime (XLM/XLF)': + ('FORMULA.FILL',), } # Suspicious Keywords to be searched for directly as regex, without escaping @@ -3249,13 +3251,21 @@ class VBA_Parser(object): data = self.ole_file.openstream(excel_stream).read() log.debug('Running BIFF plugin from oledump') try: - biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-x') + # starting from plugin_biff 0.0.12, we use the CSV output (-c) instead of -x + # biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-x') + # First let's get the list of boundsheets, and check if there are Excel 4 macros: + biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-o BOUNDSHEET') self.xlm_macros = biff_plugin.Analyze() - if len(self.xlm_macros)>0: + if "Excel 4.0 macro sheet" in '\n'.join(self.xlm_macros): log.debug('Found XLM macros') + # get the list of labels, which may contain the "Auto_Open" trigger + biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-o LABEL') + self.xlm_macros += biff_plugin.Analyze() + biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-c -r LN') + self.xlm_macros += biff_plugin.Analyze() # we run plugin_biff again, this time to search DCONN objects and get their URLs, if any: # ref: https://inquest.net/blog/2020/03/18/Getting-Sneakier-Hidden-Sheets-Data-Connections-and-XLM-Macros - biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-o 876 -s') + biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-o DCONN -s') self.xlm_macros += biff_plugin.Analyze() return True except: diff --git a/oletools/thirdparty/oledump/plugin_biff.py b/oletools/thirdparty/oledump/plugin_biff.py index f907669..646aabf 100644 --- a/oletools/thirdparty/oledump/plugin_biff.py +++ b/oletools/thirdparty/oledump/plugin_biff.py @@ -1,11 +1,9 @@ #!/usr/bin/env python -from __future__ import print_function - __description__ = 'BIFF plugin for oledump.py' __author__ = 'Didier Stevens' -__version__ = '0.0.11' -__date__ = '2020/04/06' +__version__ = '0.0.12' +__date__ = '2020/05/18' # Slightly modified version by Philippe Lagadec to be imported into olevba @@ -38,6 +36,9 @@ History: d3c1627ca2775d98717eb1abf2b70aedf383845d87993c6b924f2f55d9d4d696 (ptgFunc) 1d48a42a0b06a087e966b860c8f293a9bf57da8d70f5f83c61242afc5b81eb4f (=SELECT($B$1:$1000:$1000:$B:$B,$B$1)) 2020/04/06: 0.0.11 Python 2 bugfixes; password protect record FILEPASS + 2020/05/16: 0.0.12 option -c + 2020/05/17: option -r + 2020/05/18: continue Todo: """ @@ -52,6 +53,9 @@ import binascii from .oledump_extract import * # end modifications +DEFAULT_SEPARATOR = ',' +QUOTE = '"' + def P23Decode(value): if sys.version_info[0] > 2: try: @@ -61,6 +65,24 @@ def P23Decode(value): else: return value +def ToString(value): + if isinstance(value, str): + return value + else: + return str(value) + +def Quote(value, separator, quote): + value = ToString(value) + if len(value) > 1 and value[0] == quote and value[-1] == quote: + return value + if separator in value or value == '': + return quote + value + quote + else: + return value + +def MakeCSVLine(row, separator, quote): + return separator.join([Quote(value, separator, quote) for value in row]) + def CombineHexASCII(hexDump, asciiDump, length): if hexDump == '': return '' @@ -146,9 +168,9 @@ def ParseArea(expression): def ParseLocRelU(expression): row = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100 column = P23Ord(expression[2]) + P23Ord(expression[3]) * 0x100 - rowRelative = False #P23Ord(expression[3]) & 0x0001 - colRelative = False #P23Ord(expression[3]) & 0x0002 - #column = column & 0xFFFC + rowRelative = False #column & 0x8000 + colRelative = False #column & 0x4000 + column = column & 0x3FFF if rowRelative: rowindicator = '~' else: @@ -162,12 +184,16 @@ def ParseLocRelU(expression): return 'R%s%dC%s%d' % (rowindicator, row, colindicator, column) #https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/6e5eed10-5b77-43d6-8dd0-37345f8654ad -def ParseLoc(expression): +def ParseLoc(expression, cellrefformat, ignoreRelFlags=False): formatcodes = 'HH' formatsize = struct.calcsize(formatcodes) row, column = struct.unpack(formatcodes, expression[0:formatsize]) - rowRelative = column & 0x8000 - colRelative = column & 0x4000 + if ignoreRelFlags: + rowRelative = False + colRelative = False + else: + rowRelative = column & 0x8000 + colRelative = column & 0x4000 column = column & 0x3FFF if rowRelative: rowindicator = '~' @@ -179,9 +205,40 @@ def ParseLoc(expression): else: colindicator = '' column += 1 - return 'R%s%dC%s%d' % (rowindicator, row, colindicator, column) + if cellrefformat.upper() == 'RC': + result = 'R%s%dC%s%d' % (rowindicator, row, colindicator, column) + elif cellrefformat.upper() == 'LN': + column -= 1 + first = int(column / 26) + second = column % 26 + if first == 0: + result = '' + else: + result = chr(first + ord('A')) + result += chr(second + ord('A')) + result = '%s%d' % (result, row) + else: + raise Exception('Unknown cell reference format: %s' % cellrefformat) + return result, expression[formatsize:] + +def StackBinary(stack, operator): + if len(stack) < 2: + stack.append('*STACKERROR* not enough operands for operator: %s' % operator) + else: + operand2 = stack.pop() + operand1 = stack.pop() + stack.append(operand1 + operator + operand2) -def ParseExpression(expression): +def StackFunction(stack, function, arity): + if len(stack) < arity: + stack.append('*STACKERROR* not enough arguments for function: %s' % function) + else: + arguments = [] + for i in range(arity): + arguments.insert(0, stack.pop()) + stack.append('%s(%s)' % (function, ','.join(arguments))) + +def ParseExpression(expression, cellrefformat): dTokens = { 0x01: 'ptgExp', 0x02: 'ptgTbl', @@ -391,7 +448,7 @@ def ParseExpression(expression): 0x0069: 'ISREF', 0x006A: 'GET.FORMULA', 0x006B: 'GET.NAME', -0x006C: 'SET.VALUE', +0x006C: ['SET.VALUE', 2], 0x006D: 'LOG', 0x006E: 'EXEC', 0x006F: 'CHAR', @@ -656,6 +713,7 @@ def ParseExpression(expression): 0x0179: 'ROUNDBAHTUP', 0x017A: 'THAIYEAR', 0x017B: 'RTD', +0x01E0: 'IFERROR', #https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/0b8acba5-86d2-4854-836e-0afaee743d44 0x8000: 'BEEP', @@ -1056,34 +1114,52 @@ def ParseExpression(expression): 0x8328: 'HIDEALL.INKANNOTS', } + def GetFunctionName(functionid): + if functionid in dFunctions: + name = dFunctions[functionid] + if isinstance(name, list): + return name[0] + else: + name = '*UNKNOWN FUNCTION*' + return name + + def GetFunctionArity(functionid): + arity = 1 + if functionid in dFunctions: + entry = dFunctions[functionid] + if isinstance(entry, list): + arity = entry[1] + return arity + result = '' + stack = [] while len(expression) > 0: ptgid = P23Ord(expression[0]) expression = expression[1:] if ptgid in dTokens: result += dTokens[ptgid] + ' ' if ptgid == 0x03: # ptgAdd https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/27db2f45-11e8-4238-94ed-92fd9c5721fb - pass + StackBinary(stack, '+') elif ptgid == 0x4: # ptgSub - pass + StackBinary(stack, '-') elif ptgid == 0x5: # ptgMul - pass + StackBinary(stack, '*') elif ptgid == 0x6: # ptgDiv - pass + StackBinary(stack, '/') elif ptgid == 0x8: # ptgConcat - pass + StackBinary(stack, '&') elif ptgid == 0x09: # ptgLt https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/28de4981-1352-4a5e-a3b7-f15a8a6ce7fb - pass + StackBinary(stack, '<') elif ptgid == 0x0A: # ptgLE - pass + StackBinary(stack, '<=') elif ptgid == 0x0B: # ptgEQ - pass + StackBinary(stack, '=') elif ptgid == 0x0C: # ptgGE - pass + StackBinary(stack, '>=') elif ptgid == 0x0D: # ptgGT - pass + StackBinary(stack, '>') elif ptgid == 0x0E: # ptgNE - pass + StackBinary(stack, '<>') elif ptgid == 0x17: # ptgStr https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/87c2a057-705c-4473-a168-6d5fac4a9eba length = P23Ord(expression[0]) expression = expression[1:] @@ -1109,24 +1185,31 @@ def ParseExpression(expression): elif ptgid == 0x1d: # ptgBool https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/d59e28db-4d6f-4c86-bcc9-c8a783e352ec result += '%s ' % (IFF(P23Ord(expression[0]), 'TRUE', 'FALSE')) expression = expression[1:] - elif ptgid == 0x1e: - result += '%d ' % (P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100) + elif ptgid == 0x1e: #ptgInt + value = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100 + result += '%d ' % (value) expression = expression[2:] + stack.append(str(value)) elif ptgid == 0x41: functionid = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100 - result += '%s (0x%04x) ' % (dFunctions.get(functionid, '*UNKNOWN FUNCTION*'), functionid) + result += '%s (0x%04x) ' % (GetFunctionName(functionid), functionid) expression = expression[2:] + StackFunction(stack, GetFunctionName(functionid), GetFunctionArity(functionid)) elif ptgid == 0x22 or ptgid == 0x42 or ptgid == 0x62: functionid = P23Ord(expression[1]) + P23Ord(expression[2]) * 0x100 - result += 'args %d func %s (0x%04x) ' % (P23Ord(expression[0]), dFunctions.get(functionid, '*UNKNOWN FUNCTION*'), functionid) + numberOfArguments = P23Ord(expression[0]) + result += 'args %d func %s (0x%04x) ' % (numberOfArguments, GetFunctionName(functionid), functionid) expression = expression[3:] if functionid == 0x806D: expression = expression[9:] + StackFunction(stack, GetFunctionName(functionid), numberOfArguments) elif ptgid == 0x23: # ptgName https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/5f05c166-dfe3-4bbf-85aa-31c09c0258c0 result += '0x%08x ' % (struct.unpack('> 2) / divider + else: + return struct.unpack('= 21: - formatcodes = 'HH' - formatsize = struct.calcsize(formatcodes) - row, column = struct.unpack(formatcodes, data[0:formatsize]) + cellref, dummy = ParseLoc(data, options.cellrefformat, True) formatcodes = 'H' formatsize = struct.calcsize(formatcodes) length = struct.unpack(formatcodes, data[20:20 + formatsize])[0] expression = data[22:] - line += ' - R%dC%d len=%d %s' % (row + 1, column + 1, length, ParseExpression(expression)) + parsedExpression, stack = ParseExpression(expression, options.cellrefformat) + line += ' - %s len=%d %s' % (cellref, length, parsedExpression) + if len(stack) == 1: + csvrow = [currentSheetname, cellref, stack[0], ''] + else: + csvrow = [currentSheetname, cellref, repr(stack), ''] if options.formulabytes: data_hex = P23Decode(binascii.b2a_hex(data)) spaced_data_hex = ' '.join(a+b for a,b in zip(data_hex[::2], data_hex[1::2])) @@ -1508,17 +1624,22 @@ class cBIFF(cPluginParent): # FORMULA record #a# difference BIFF4 and BIFF5+ if opcode == 0x18 and len(data) >= 16: - if P23Ord(data[0]) & 0x20: + flags = P23Ord(data[0]) + lnName = P23Ord(data[3]) + szFormula = P23Ord(data[4]) + P23Ord(data[5]) * 0x100 + offset = 14 + if P23Ord(data[offset]) == 0: #a# hack with BIFF8 Unicode + offset = 15 + if flags & 0x20: dBuildInNames = {1: 'Auto_Open', 2: 'Auto_Close'} - code = P23Ord(data[14]) - if code == 0: #a# hack with BIFF8 Unicode - code = P23Ord(data[15]) + code = P23Ord(data[offset]) line += ' - build-in-name %d %s' % (code, dBuildInNames.get(code, '?')) else: - offset = 14 - if P23Ord(data[offset]) == 0: - offset = 15 - line += ' - %s' % (P23Decode(data[offset:offset+P23Ord(data[3])])) + line += ' - %s' % (P23Decode(data[offset:offset+lnName])) + if flags & 0x01: + line += ' hidden' + parsedExpression, stack = ParseExpression(data[offset+lnName:offset+lnName+szFormula], options.cellrefformat) + line += ' len=%d %s' % (szFormula, parsedExpression) # FILEPASS record if opcode == 0x2f: @@ -1526,11 +1647,27 @@ class cBIFF(cPluginParent): # BOUNDSHEET record if opcode == 0x85 and len(data) >= 6: + formatcodes = '= 4: + formatcodes = 'H' + formatsize = struct.calcsize(formatcodes) + dt = struct.unpack(formatcodes, data[2:2 + formatsize])[0] + dStreamType = {5: 'workbook', 0x10: 'dialog sheet/worksheet', 0x20: 'chart sheet', 0x40: 'macro sheet'} + line += ' - %s' % (dStreamType.get(dt, '0x%04x' % dt)) + if positionBIFFRecord in dSheetNames: + line += ' - %s' % (dSheetNames[positionBIFFRecord]) + currentSheetname = dSheetNames[positionBIFFRecord] # STRING record if opcode == 0x207 and len(data) >= 4: @@ -1544,9 +1681,32 @@ class cBIFF(cPluginParent): strings += b' '.join(values[1]) line += ' - %s' % strings + # number record + if opcode == 0x0203: + cellref, data2 = ParseLoc(data, options.cellrefformat, True) + formatcodes = '