Commit 6d7514ce799d78c0b23c4d187554423ca78b3149

Authored by decalage2
1 parent 8b97e45b

plugin_biff: updated to v0.0.11 (improved formula parsing)

oletools/olevba.py
... ... @@ -227,7 +227,7 @@ from __future__ import print_function
227 227 # 2020-01-31 v0.56 KS: - added option --no-xlm, improved MHT detection
228 228 # 2020-03-22 PL: - uses plugin_biff to display DCONN objects and their URL
229 229  
230   -__version__ = '0.56dev3'
  230 +__version__ = '0.56dev4'
231 231  
232 232 #------------------------------------------------------------------------------
233 233 # TODO:
... ...
oletools/thirdparty/oledump/oledump_extract.py 0 → 100644
  1 +#!/usr/bin/env python
  2 +
  3 +# Small extract of oledump.py to be able to run plugin_biff from olevba
  4 +
  5 +__description__ = 'Analyze OLE files (Compound Binary Files)'
  6 +__author__ = 'Didier Stevens'
  7 +__version__ = '0.0.49'
  8 +__date__ = '2020/03/28'
  9 +
  10 +"""
  11 +
  12 +Source code put in public domain by Didier Stevens, no Copyright
  13 +https://DidierStevens.com
  14 +Use at your own risk
  15 +"""
  16 +
  17 +class cPluginParent():
  18 + macroOnly = False
  19 + indexQuiet = False
  20 +
  21 +plugins = []
  22 +
  23 +def AddPlugin(cClass):
  24 + global plugins
  25 +
  26 + plugins.append(cClass)
  27 +
  28 +
  29 +# CIC: Call If Callable
  30 +def CIC(expression):
  31 + if callable(expression):
  32 + return expression()
  33 + else:
  34 + return expression
  35 +
  36 +# IFF: IF Function
  37 +def IFF(expression, valueTrue, valueFalse):
  38 + if expression:
  39 + return CIC(valueTrue)
  40 + else:
  41 + return CIC(valueFalse)
  42 +
  43 +def P23Ord(value):
  44 + if type(value) == int:
  45 + return value
  46 + else:
  47 + return ord(value)
  48 +
  49 +def P23Chr(value):
  50 + if type(value) == int:
  51 + return chr(value)
  52 + else:
  53 + return value
... ...
oletools/thirdparty/oledump/plugin_biff.py
... ... @@ -2,8 +2,8 @@
2 2  
3 3 __description__ = 'BIFF plugin for oledump.py'
4 4 __author__ = 'Didier Stevens'
5   -__version__ = '0.0.9'
6   -__date__ = '2020/03/09'
  5 +__version__ = '0.0.11'
  6 +__date__ = '2020/04/06'
7 7  
8 8 # Slightly modified version by Philippe Lagadec to be imported into olevba
9 9  
... ... @@ -28,6 +28,14 @@ History:
28 28 2020/02/23: 0.0.7 performance improvement
29 29 2020/03/08: 0.0.8 added options -X and -d
30 30 2020/03/09: 0.0.9 improved formula parsing; Python 3 bugfixes
  31 + 2020/03/27: 0.0.10 improved formula parsing and debug modes. (by @JohnLaTwC)
  32 + 05219f8c047f1dff861634c4b50d4f6978c87c35f4c14d21ee9d757cac9280cf (ptgConcat)
  33 + 94b26003699efba54ced98006379a230d1154f340589cc89af7d0cbedb861a53 (encoding, ptgFuncVarA, ptgNameX)
  34 + d3c1627ca2775d98717eb1abf2b70aedf383845d87993c6b924f2f55d9d4d696 (ptgArea)
  35 + 01761b06c24baa818b0a75059e745871246a5e9c6ce0243ad96e8632342cbb59 (ptgFuncVarA)
  36 + d3c1627ca2775d98717eb1abf2b70aedf383845d87993c6b924f2f55d9d4d696 (ptgFunc)
  37 + 1d48a42a0b06a087e966b860c8f293a9bf57da8d70f5f83c61242afc5b81eb4f (=SELECT($B$1:$1000:$1000:$B:$B,$B$1))
  38 + 2020/04/06: 0.0.11 Python 2 bugfixes; password protect record FILEPASS
31 39  
32 40 Todo:
33 41 """
... ... @@ -35,44 +43,19 @@ Todo:
35 43 import struct
36 44 import re
37 45 import optparse
  46 +
  47 +# Modifications for olevba:
38 48 import sys
39 49 import binascii
40   -
41   -# A few functions backported from oledump.py:
42   -
43   -class cPluginParent():
44   - macroOnly = False
45   - indexQuiet = False
46   -
47   -# CIC: Call If Callable
48   -def CIC(expression):
49   - if callable(expression):
50   - return expression()
51   - else:
52   - return expression
53   -
54   -# IFF: IF Function
55   -def IFF(expression, valueTrue, valueFalse):
56   - if expression:
57   - return CIC(valueTrue)
58   - else:
59   - return CIC(valueFalse)
60   -
61   -def P23Ord(value):
62   - if type(value) == int:
63   - return value
64   - else:
65   - return ord(value)
66   -
67   -def P23Chr(value):
68   - if type(value) == int:
69   - return chr(value)
70   - else:
71   - return value
  50 +from .oledump_extract import *
  51 +# end modifications
72 52  
73 53 def P23Decode(value):
74 54 if sys.version_info[0] > 2:
75   - return value.decode()
  55 + try:
  56 + return value.decode('utf-8')
  57 + except UnicodeDecodeError as u:
  58 + return value.decode('windows-1252')
76 59 else:
77 60 return value
78 61  
... ... @@ -115,6 +98,67 @@ def Strings(data, encodings='sL'):
115 98 def ContainsWP23Ord(word, expression):
116 99 return struct.pack('<H', word) in expression
117 100  
  101 +# https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/75afd109-b1ce-4511-b56f-2d63116f6647
  102 +def ParseArea(expression):
  103 + formatcodes = 'HHHH'
  104 + formatsize = struct.calcsize(formatcodes)
  105 + if len(expression) < formatsize:
  106 + return '*ERROR*'
  107 + row1,row2,col1,col2 = struct.unpack(formatcodes, expression[0:formatsize])
  108 + row1Relative = col1 & 0x8000
  109 + col1Relative = col1 & 0x4000
  110 + row2Relative = col2 & 0x8000
  111 + col2Relative = col2 & 0x4000
  112 + col1 = col1 & 0x3FFF
  113 + col2 = col2 & 0x3FFF
  114 +
  115 + if row1Relative:
  116 + row1indicator = '~'
  117 + else:
  118 + row1indicator = ''
  119 + row1 += 1
  120 + if col1Relative:
  121 + col1indicator = '~'
  122 + else:
  123 + col1indicator = ''
  124 + col1 += 1
  125 + if row2Relative:
  126 + row2indicator = '~'
  127 + else:
  128 + row2indicator = ''
  129 + row2 += 1
  130 + if col2Relative:
  131 + col2indicator = '~'
  132 + else:
  133 + col2indicator = ''
  134 + col2 += 1
  135 +
  136 + if row1 == row2 and col2 >=256:
  137 + return 'R%s%d' % (row1indicator, row1)
  138 + if col1 == col2 and row2 >= 65536:
  139 + return 'C%s%d' % (col1indicator, col1)
  140 +
  141 + return 'R%s%dC%s%d' % (row1indicator, row1, col1indicator, col1)
  142 +
  143 +# https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/6e5eed10-5b77-43d6-8dd0-37345f8654ad
  144 +def ParseLocRelU(expression):
  145 + row = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100
  146 + column = P23Ord(expression[2]) + P23Ord(expression[3]) * 0x100
  147 + rowRelative = False #P23Ord(expression[3]) & 0x0001
  148 + colRelative = False #P23Ord(expression[3]) & 0x0002
  149 + #column = column & 0xFFFC
  150 + if rowRelative:
  151 + rowindicator = '~'
  152 + else:
  153 + rowindicator = ''
  154 + row += 1
  155 + if colRelative:
  156 + colindicator = '~'
  157 + else:
  158 + colindicator = ''
  159 + column += 1
  160 + return 'R%s%dC%s%d' % (rowindicator, row, colindicator, column)
  161 +
118 162 #https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/6e5eed10-5b77-43d6-8dd0-37345f8654ad
119 163 def ParseLoc(expression):
120 164 formatcodes = 'HH'
... ... @@ -160,6 +204,7 @@ def ParseExpression(expression):
160 204 0x15: 'ptgParen',
161 205 0x16: 'ptgMissArg',
162 206 0x17: 'ptgStr',
  207 +0x18: 'ptgExtend',
163 208 0x19: 'ptgAttr',
164 209 0x1A: 'ptgSheet',
165 210 0x1B: 'ptgEndSheet',
... ... @@ -1017,15 +1062,38 @@ def ParseExpression(expression):
1017 1062 result += dTokens[ptgid] + ' '
1018 1063 if ptgid == 0x03: # ptgAdd https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/27db2f45-11e8-4238-94ed-92fd9c5721fb
1019 1064 pass
  1065 + elif ptgid == 0x4: # ptgSub
  1066 + pass
  1067 + elif ptgid == 0x5: # ptgMul
  1068 + pass
  1069 + elif ptgid == 0x6: # ptgDiv
  1070 + pass
  1071 + elif ptgid == 0x8: # ptgConcat
  1072 + pass
1020 1073 elif ptgid == 0x09: # ptgLt https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/28de4981-1352-4a5e-a3b7-f15a8a6ce7fb
1021 1074 pass
  1075 + elif ptgid == 0x0A: # ptgLE
  1076 + pass
  1077 + elif ptgid == 0x0B: # ptgEQ
  1078 + pass
  1079 + elif ptgid == 0x0C: # ptgGE
  1080 + pass
  1081 + elif ptgid == 0x0D: # ptgGT
  1082 + pass
  1083 + elif ptgid == 0x0E: # ptgNE
  1084 + pass
1022 1085 elif ptgid == 0x17: # ptgStr https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/87c2a057-705c-4473-a168-6d5fac4a9eba
1023 1086 length = P23Ord(expression[0])
1024 1087 expression = expression[1:]
1025 1088 if P23Ord(expression[0]) == 0: # probably BIFF8 -> UNICODE (compressed)
1026 1089 expression = expression[1:]
1027   - result += '"%s" ' % P23Decode(expression[:length])
1028   - expression = expression[length:]
  1090 + result += '"%s" ' % P23Decode(expression[:length])
  1091 + expression = expression[length:]
  1092 + elif P23Ord(expression[0]) == 1: # if 1, then double byte chars
  1093 + # doublebyte check: https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/05162858-0ca9-44cb-bb07-a720928f63f8
  1094 + expression = expression[1:]
  1095 + result += '"%s" ' % P23Decode(expression[:length*2])
  1096 + expression = expression[length*2:]
1029 1097 elif ptgid == 0x19:
1030 1098 grbit = P23Ord(expression[0])
1031 1099 expression = expression[1:]
... ... @@ -1046,10 +1114,12 @@ def ParseExpression(expression):
1046 1114 functionid = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100
1047 1115 result += '%s (0x%04x) ' % (dFunctions.get(functionid, '*UNKNOWN FUNCTION*'), functionid)
1048 1116 expression = expression[2:]
1049   - elif ptgid == 0x22 or ptgid == 0x42:
  1117 + elif ptgid == 0x22 or ptgid == 0x42 or ptgid == 0x62:
1050 1118 functionid = P23Ord(expression[1]) + P23Ord(expression[2]) * 0x100
1051 1119 result += 'args %d func %s (0x%04x) ' % (P23Ord(expression[0]), dFunctions.get(functionid, '*UNKNOWN FUNCTION*'), functionid)
1052 1120 expression = expression[3:]
  1121 + if functionid == 0x806D:
  1122 + expression = expression[9:]
1053 1123 elif ptgid == 0x23: # ptgName https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/5f05c166-dfe3-4bbf-85aa-31c09c0258c0
1054 1124 result += '0x%08x ' % (struct.unpack('<I', expression[0:4]))
1055 1125 expression = expression[4:]
... ... @@ -1057,8 +1127,9 @@ def ParseExpression(expression):
1057 1127 result += 'FLOAT '
1058 1128 expression = expression[8:]
1059 1129 elif ptgid == 0x26:
1060   - expression = expression[4:]
1061   - expression = expression[P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100:]
  1130 + ## expression = expression[4:]
  1131 + ## expression = expression[P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100:]
  1132 + expression = expression[6:]
1062 1133 result += 'REFERENCE-EXPRESSION '
1063 1134 elif ptgid == 0x01:
1064 1135 formatcodes = 'HH'
... ... @@ -1067,11 +1138,32 @@ def ParseExpression(expression):
1067 1138 expression = expression[formatsize:]
1068 1139 result += 'R%dC%d ' % (row + 1, column + 1)
1069 1140 elif ptgid == 0x24 or ptgid == 0x44:
1070   - result += '%s ' % ParseLoc(expression)
  1141 + result += '%s ' % ParseLocRelU(expression)
1071 1142 expression = expression[4:]
  1143 + elif ptgid == 0x11: # ptgRange
  1144 + pass
  1145 + elif ptgid == 0x25: # ptgArea
  1146 + result += '%s ' % ParseArea(expression[0:8])
  1147 + expression = expression[8:]
1072 1148 elif ptgid == 0x3A or ptgid == 0x5A:
1073 1149 result += '%s ' % ParseLoc(expression[2:])
1074 1150 expression = expression[6:]
  1151 + elif ptgid == 0x39: # PtgNameX
  1152 + expression = expression[2:]
  1153 + formatcodes = 'H'
  1154 + formatsize = struct.calcsize(formatcodes)
  1155 + nameindex = struct.unpack(formatcodes, expression[0:formatsize])[0]
  1156 + result += ' NAMEIDX %d ' % nameindex
  1157 + expression = expression[4:]
  1158 + elif ptgid == 0x21: #ptgFunc
  1159 + functionid = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100
  1160 + result += '%s ' % dFunctions.get(functionid, '*UNKNOWN FUNCTION*')
  1161 + expression = expression[2:]
  1162 + elif ptgid == 0x61 or ptgid == 0x62: # ptgFuncVar ptgFuncVarA
  1163 + params_count = P23Ord(expression[0])
  1164 + functionid = P23Ord(expression[1]) + P23Ord(expression[2]) * 0x100
  1165 + result += '%s ' % dFunctions.get(functionid, '*UNKNOWN FUNCTION*')
  1166 + expression = expression[(2+params_count):]
1075 1167 else:
1076 1168 break
1077 1169 else:
... ... @@ -1101,6 +1193,7 @@ class cBIFF(cPluginParent):
1101 1193 def Analyze(self):
1102 1194 result = []
1103 1195 macros4Found = False
  1196 + filepassFound = False
1104 1197 dOpcodes = {
1105 1198 0x06: 'FORMULA : Cell Formula',
1106 1199 0x0A: 'EOF : End of File',
... ... @@ -1370,6 +1463,7 @@ class cBIFF(cPluginParent):
1370 1463 oParser.add_option('-s', '--strings', action='store_true', default=False, help='Dump strings')
1371 1464 oParser.add_option('-a', '--hexascii', action='store_true', default=False, help='Dump hex ascii')
1372 1465 oParser.add_option('-X', '--hex', action='store_true', default=False, help='Dump hex without whitespace')
  1466 + oParser.add_option('-b', '--formulabytes', action='store_true', default=False, help='Dump formula bytes')
1373 1467 oParser.add_option('-d', '--dump', action='store_true', default=False, help='Dump')
1374 1468 oParser.add_option('-x', '--xlm', action='store_true', default=False, help='Select all records relevant for Excel 4.0 macros')
1375 1469 oParser.add_option('-o', '--opcode', type=str, default='', help='Opcode to filter for')
... ... @@ -1405,6 +1499,10 @@ class cBIFF(cPluginParent):
1405 1499 length = struct.unpack(formatcodes, data[20:20 + formatsize])[0]
1406 1500 expression = data[22:]
1407 1501 line += ' - R%dC%d len=%d %s' % (row + 1, column + 1, length, ParseExpression(expression))
  1502 + if options.formulabytes:
  1503 + data_hex = P23Decode(binascii.b2a_hex(data))
  1504 + spaced_data_hex = ' '.join(a+b for a,b in zip(data_hex[::2], data_hex[1::2]))
  1505 + line += '\nFORMULA BYTES: %s' % spaced_data_hex
1408 1506  
1409 1507 # FORMULA record #a# difference BIFF4 and BIFF5+
1410 1508 if opcode == 0x18 and len(data) >= 16:
... ... @@ -1420,6 +1518,10 @@ class cBIFF(cPluginParent):
1420 1518 offset = 15
1421 1519 line += ' - %s' % (P23Decode(data[offset:offset+P23Ord(data[3])]))
1422 1520  
  1521 + # FILEPASS record
  1522 + if opcode == 0x2f:
  1523 + filepassFound = True
  1524 +
1423 1525 # BOUNDSHEET record
1424 1526 if opcode == 0x85 and len(data) >= 6:
1425 1527 dSheetType = {0: 'worksheet or dialog sheet', 1: 'Excel 4.0 macro sheet', 2: 'chart', 6: 'Visual Basic module'}
... ... @@ -1433,7 +1535,7 @@ class cBIFF(cPluginParent):
1433 1535 values = list(Strings(data[3:]).values())
1434 1536 strings = b''
1435 1537 if values[0] != []:
1436   - strings += b' '.join(values[0])
  1538 + strings = values[0][0].encode()
1437 1539 if values[1] != []:
1438 1540 if strings != b'':
1439 1541 strings += b' '
... ... @@ -1457,9 +1559,11 @@ class cBIFF(cPluginParent):
1457 1559 elif options.dump:
1458 1560 result = data
1459 1561  
1460   - if options.xlm and not macros4Found:
  1562 + if options.xlm and filepassFound:
  1563 + result = ['FILEPASS record: file is password protected']
  1564 + elif options.xlm and not macros4Found:
1461 1565 result = []
1462 1566  
1463 1567 return result
1464 1568  
1465   -#AddPlugin(cBIFF)
  1569 +AddPlugin(cBIFF)
... ...
setup.py
... ... @@ -52,7 +52,7 @@ import os, fnmatch
52 52 #--- METADATA -----------------------------------------------------------------
53 53  
54 54 name = "oletools"
55   -version = '0.56dev3'
  55 +version = '0.56dev4'
56 56 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR"
57 57 long_desc = open('oletools/README.rst').read()
58 58 author = "Philippe Lagadec"
... ...