Commit 1624ef07959340e49ffdf27f5fac49c73c0760a0

Authored by decalage2
1 parent 6d959ef1

olevba: updated plugin_biff to v0.0.12 to improve Excel 4/XLM macros parsing, ad…

…ded detection of FORMULA.FILL
oletools/olevba.py
... ... @@ -227,7 +227,7 @@ from __future__ import print_function
227 227 # 2020-01-31 v0.56 KS: - added option --no-xlm, improved MHT detection
228 228 # 2020-03-22 PL: - uses plugin_biff to display DCONN objects and their URL
229 229  
230   -__version__ = '0.56dev4'
  230 +__version__ = '0.56dev5'
231 231  
232 232 #------------------------------------------------------------------------------
233 233 # TODO:
... ... @@ -795,6 +795,8 @@ SUSPICIOUS_KEYWORDS = {
795 795 'DisableUnsafeLocationsInPV', 'blockcontentexecutionfrominternet'),
796 796 'May attempt to modify the VBA code (self-modification)':
797 797 ('VBProject', 'VBComponents', 'CodeModule', 'AddFromString'),
  798 + 'May modify Excel 4 Macro formulas at runtime (XLM/XLF)':
  799 + ('FORMULA.FILL',),
798 800 }
799 801  
800 802 # Suspicious Keywords to be searched for directly as regex, without escaping
... ... @@ -3249,13 +3251,21 @@ class VBA_Parser(object):
3249 3251 data = self.ole_file.openstream(excel_stream).read()
3250 3252 log.debug('Running BIFF plugin from oledump')
3251 3253 try:
3252   - biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-x')
  3254 + # starting from plugin_biff 0.0.12, we use the CSV output (-c) instead of -x
  3255 + # biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-x')
  3256 + # First let's get the list of boundsheets, and check if there are Excel 4 macros:
  3257 + biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-o BOUNDSHEET')
3253 3258 self.xlm_macros = biff_plugin.Analyze()
3254   - if len(self.xlm_macros)>0:
  3259 + if "Excel 4.0 macro sheet" in '\n'.join(self.xlm_macros):
3255 3260 log.debug('Found XLM macros')
  3261 + # get the list of labels, which may contain the "Auto_Open" trigger
  3262 + biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-o LABEL')
  3263 + self.xlm_macros += biff_plugin.Analyze()
  3264 + biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-c -r LN')
  3265 + self.xlm_macros += biff_plugin.Analyze()
3256 3266 # we run plugin_biff again, this time to search DCONN objects and get their URLs, if any:
3257 3267 # ref: https://inquest.net/blog/2020/03/18/Getting-Sneakier-Hidden-Sheets-Data-Connections-and-XLM-Macros
3258   - biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-o 876 -s')
  3268 + biff_plugin = cBIFF(name=[excel_stream], stream=data, options='-o DCONN -s')
3259 3269 self.xlm_macros += biff_plugin.Analyze()
3260 3270 return True
3261 3271 except:
... ...
oletools/thirdparty/oledump/plugin_biff.py
1 1 #!/usr/bin/env python
2 2  
3   -from __future__ import print_function
4   -
5 3 __description__ = 'BIFF plugin for oledump.py'
6 4 __author__ = 'Didier Stevens'
7   -__version__ = '0.0.11'
8   -__date__ = '2020/04/06'
  5 +__version__ = '0.0.12'
  6 +__date__ = '2020/05/18'
9 7  
10 8 # Slightly modified version by Philippe Lagadec to be imported into olevba
11 9  
... ... @@ -38,6 +36,9 @@ History:
38 36 d3c1627ca2775d98717eb1abf2b70aedf383845d87993c6b924f2f55d9d4d696 (ptgFunc)
39 37 1d48a42a0b06a087e966b860c8f293a9bf57da8d70f5f83c61242afc5b81eb4f (=SELECT($B$1:$1000:$1000:$B:$B,$B$1))
40 38 2020/04/06: 0.0.11 Python 2 bugfixes; password protect record FILEPASS
  39 + 2020/05/16: 0.0.12 option -c
  40 + 2020/05/17: option -r
  41 + 2020/05/18: continue
41 42  
42 43 Todo:
43 44 """
... ... @@ -52,6 +53,9 @@ import binascii
52 53 from .oledump_extract import *
53 54 # end modifications
54 55  
  56 +DEFAULT_SEPARATOR = ','
  57 +QUOTE = '"'
  58 +
55 59 def P23Decode(value):
56 60 if sys.version_info[0] > 2:
57 61 try:
... ... @@ -61,6 +65,24 @@ def P23Decode(value):
61 65 else:
62 66 return value
63 67  
  68 +def ToString(value):
  69 + if isinstance(value, str):
  70 + return value
  71 + else:
  72 + return str(value)
  73 +
  74 +def Quote(value, separator, quote):
  75 + value = ToString(value)
  76 + if len(value) > 1 and value[0] == quote and value[-1] == quote:
  77 + return value
  78 + if separator in value or value == '':
  79 + return quote + value + quote
  80 + else:
  81 + return value
  82 +
  83 +def MakeCSVLine(row, separator, quote):
  84 + return separator.join([Quote(value, separator, quote) for value in row])
  85 +
64 86 def CombineHexASCII(hexDump, asciiDump, length):
65 87 if hexDump == '':
66 88 return ''
... ... @@ -146,9 +168,9 @@ def ParseArea(expression):
146 168 def ParseLocRelU(expression):
147 169 row = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100
148 170 column = P23Ord(expression[2]) + P23Ord(expression[3]) * 0x100
149   - rowRelative = False #P23Ord(expression[3]) & 0x0001
150   - colRelative = False #P23Ord(expression[3]) & 0x0002
151   - #column = column & 0xFFFC
  171 + rowRelative = False #column & 0x8000
  172 + colRelative = False #column & 0x4000
  173 + column = column & 0x3FFF
152 174 if rowRelative:
153 175 rowindicator = '~'
154 176 else:
... ... @@ -162,12 +184,16 @@ def ParseLocRelU(expression):
162 184 return 'R%s%dC%s%d' % (rowindicator, row, colindicator, column)
163 185  
164 186 #https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/6e5eed10-5b77-43d6-8dd0-37345f8654ad
165   -def ParseLoc(expression):
  187 +def ParseLoc(expression, cellrefformat, ignoreRelFlags=False):
166 188 formatcodes = 'HH'
167 189 formatsize = struct.calcsize(formatcodes)
168 190 row, column = struct.unpack(formatcodes, expression[0:formatsize])
169   - rowRelative = column & 0x8000
170   - colRelative = column & 0x4000
  191 + if ignoreRelFlags:
  192 + rowRelative = False
  193 + colRelative = False
  194 + else:
  195 + rowRelative = column & 0x8000
  196 + colRelative = column & 0x4000
171 197 column = column & 0x3FFF
172 198 if rowRelative:
173 199 rowindicator = '~'
... ... @@ -179,9 +205,40 @@ def ParseLoc(expression):
179 205 else:
180 206 colindicator = ''
181 207 column += 1
182   - return 'R%s%dC%s%d' % (rowindicator, row, colindicator, column)
  208 + if cellrefformat.upper() == 'RC':
  209 + result = 'R%s%dC%s%d' % (rowindicator, row, colindicator, column)
  210 + elif cellrefformat.upper() == 'LN':
  211 + column -= 1
  212 + first = int(column / 26)
  213 + second = column % 26
  214 + if first == 0:
  215 + result = ''
  216 + else:
  217 + result = chr(first + ord('A'))
  218 + result += chr(second + ord('A'))
  219 + result = '%s%d' % (result, row)
  220 + else:
  221 + raise Exception('Unknown cell reference format: %s' % cellrefformat)
  222 + return result, expression[formatsize:]
  223 +
  224 +def StackBinary(stack, operator):
  225 + if len(stack) < 2:
  226 + stack.append('*STACKERROR* not enough operands for operator: %s' % operator)
  227 + else:
  228 + operand2 = stack.pop()
  229 + operand1 = stack.pop()
  230 + stack.append(operand1 + operator + operand2)
183 231  
184   -def ParseExpression(expression):
  232 +def StackFunction(stack, function, arity):
  233 + if len(stack) < arity:
  234 + stack.append('*STACKERROR* not enough arguments for function: %s' % function)
  235 + else:
  236 + arguments = []
  237 + for i in range(arity):
  238 + arguments.insert(0, stack.pop())
  239 + stack.append('%s(%s)' % (function, ','.join(arguments)))
  240 +
  241 +def ParseExpression(expression, cellrefformat):
185 242 dTokens = {
186 243 0x01: 'ptgExp',
187 244 0x02: 'ptgTbl',
... ... @@ -391,7 +448,7 @@ def ParseExpression(expression):
391 448 0x0069: 'ISREF',
392 449 0x006A: 'GET.FORMULA',
393 450 0x006B: 'GET.NAME',
394   -0x006C: 'SET.VALUE',
  451 +0x006C: ['SET.VALUE', 2],
395 452 0x006D: 'LOG',
396 453 0x006E: 'EXEC',
397 454 0x006F: 'CHAR',
... ... @@ -656,6 +713,7 @@ def ParseExpression(expression):
656 713 0x0179: 'ROUNDBAHTUP',
657 714 0x017A: 'THAIYEAR',
658 715 0x017B: 'RTD',
  716 +0x01E0: 'IFERROR',
659 717  
660 718 #https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/0b8acba5-86d2-4854-836e-0afaee743d44
661 719 0x8000: 'BEEP',
... ... @@ -1056,34 +1114,52 @@ def ParseExpression(expression):
1056 1114 0x8328: 'HIDEALL.INKANNOTS',
1057 1115 }
1058 1116  
  1117 + def GetFunctionName(functionid):
  1118 + if functionid in dFunctions:
  1119 + name = dFunctions[functionid]
  1120 + if isinstance(name, list):
  1121 + return name[0]
  1122 + else:
  1123 + name = '*UNKNOWN FUNCTION*'
  1124 + return name
  1125 +
  1126 + def GetFunctionArity(functionid):
  1127 + arity = 1
  1128 + if functionid in dFunctions:
  1129 + entry = dFunctions[functionid]
  1130 + if isinstance(entry, list):
  1131 + arity = entry[1]
  1132 + return arity
  1133 +
1059 1134 result = ''
  1135 + stack = []
1060 1136 while len(expression) > 0:
1061 1137 ptgid = P23Ord(expression[0])
1062 1138 expression = expression[1:]
1063 1139 if ptgid in dTokens:
1064 1140 result += dTokens[ptgid] + ' '
1065 1141 if ptgid == 0x03: # ptgAdd https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/27db2f45-11e8-4238-94ed-92fd9c5721fb
1066   - pass
  1142 + StackBinary(stack, '+')
1067 1143 elif ptgid == 0x4: # ptgSub
1068   - pass
  1144 + StackBinary(stack, '-')
1069 1145 elif ptgid == 0x5: # ptgMul
1070   - pass
  1146 + StackBinary(stack, '*')
1071 1147 elif ptgid == 0x6: # ptgDiv
1072   - pass
  1148 + StackBinary(stack, '/')
1073 1149 elif ptgid == 0x8: # ptgConcat
1074   - pass
  1150 + StackBinary(stack, '&')
1075 1151 elif ptgid == 0x09: # ptgLt https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/28de4981-1352-4a5e-a3b7-f15a8a6ce7fb
1076   - pass
  1152 + StackBinary(stack, '<')
1077 1153 elif ptgid == 0x0A: # ptgLE
1078   - pass
  1154 + StackBinary(stack, '<=')
1079 1155 elif ptgid == 0x0B: # ptgEQ
1080   - pass
  1156 + StackBinary(stack, '=')
1081 1157 elif ptgid == 0x0C: # ptgGE
1082   - pass
  1158 + StackBinary(stack, '>=')
1083 1159 elif ptgid == 0x0D: # ptgGT
1084   - pass
  1160 + StackBinary(stack, '>')
1085 1161 elif ptgid == 0x0E: # ptgNE
1086   - pass
  1162 + StackBinary(stack, '<>')
1087 1163 elif ptgid == 0x17: # ptgStr https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/87c2a057-705c-4473-a168-6d5fac4a9eba
1088 1164 length = P23Ord(expression[0])
1089 1165 expression = expression[1:]
... ... @@ -1109,24 +1185,31 @@ def ParseExpression(expression):
1109 1185 elif ptgid == 0x1d: # ptgBool https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/d59e28db-4d6f-4c86-bcc9-c8a783e352ec
1110 1186 result += '%s ' % (IFF(P23Ord(expression[0]), 'TRUE', 'FALSE'))
1111 1187 expression = expression[1:]
1112   - elif ptgid == 0x1e:
1113   - result += '%d ' % (P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100)
  1188 + elif ptgid == 0x1e: #ptgInt
  1189 + value = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100
  1190 + result += '%d ' % (value)
1114 1191 expression = expression[2:]
  1192 + stack.append(str(value))
1115 1193 elif ptgid == 0x41:
1116 1194 functionid = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100
1117   - result += '%s (0x%04x) ' % (dFunctions.get(functionid, '*UNKNOWN FUNCTION*'), functionid)
  1195 + result += '%s (0x%04x) ' % (GetFunctionName(functionid), functionid)
1118 1196 expression = expression[2:]
  1197 + StackFunction(stack, GetFunctionName(functionid), GetFunctionArity(functionid))
1119 1198 elif ptgid == 0x22 or ptgid == 0x42 or ptgid == 0x62:
1120 1199 functionid = P23Ord(expression[1]) + P23Ord(expression[2]) * 0x100
1121   - result += 'args %d func %s (0x%04x) ' % (P23Ord(expression[0]), dFunctions.get(functionid, '*UNKNOWN FUNCTION*'), functionid)
  1200 + numberOfArguments = P23Ord(expression[0])
  1201 + result += 'args %d func %s (0x%04x) ' % (numberOfArguments, GetFunctionName(functionid), functionid)
1122 1202 expression = expression[3:]
1123 1203 if functionid == 0x806D:
1124 1204 expression = expression[9:]
  1205 + StackFunction(stack, GetFunctionName(functionid), numberOfArguments)
1125 1206 elif ptgid == 0x23: # ptgName https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/5f05c166-dfe3-4bbf-85aa-31c09c0258c0
1126 1207 result += '0x%08x ' % (struct.unpack('<I', expression[0:4]))
1127 1208 expression = expression[4:]
1128 1209 elif ptgid == 0x1f:
1129   - result += '%f ' % (struct.unpack('d', expression[:8]))
  1210 + value = struct.unpack('<d', expression[:8])[0]
  1211 + result += 'FLOAT %f ' % value
  1212 + stack.append('%.20f' % value)
1130 1213 expression = expression[8:]
1131 1214 elif ptgid == 0x26:
1132 1215 ## expression = expression[4:]
... ... @@ -1134,37 +1217,38 @@ def ParseExpression(expression):
1134 1217 expression = expression[6:]
1135 1218 result += 'REFERENCE-EXPRESSION '
1136 1219 elif ptgid == 0x01:
1137   - formatcodes = 'HH'
1138   - formatsize = struct.calcsize(formatcodes)
1139   - row, column = struct.unpack(formatcodes, expression[0:formatsize])
1140   - expression = expression[formatsize:]
1141   - result += 'R%dC%d ' % (row + 1, column + 1)
1142   - elif ptgid == 0x24 or ptgid == 0x44:
1143   - result += '%s ' % ParseLocRelU(expression)
1144   - expression = expression[4:]
  1220 + cellref, expression = ParseLoc(expression, cellrefformat, True)
  1221 + result += '%s ' % cellref
  1222 + elif ptgid == 0x24 or ptgid == 0x44: #ptgRef #ptgRefV
  1223 + cellref, expression = ParseLoc(expression, cellrefformat, True)
  1224 + result += '%s ' % cellref
  1225 + stack.append(cellref)
1145 1226 elif ptgid == 0x11: # ptgRange
1146 1227 pass
1147 1228 elif ptgid == 0x25: # ptgArea
1148 1229 result += '%s ' % ParseArea(expression[0:8])
1149 1230 expression = expression[8:]
1150   - elif ptgid == 0x3A or ptgid == 0x5A:
1151   - result += '%s ' % ParseLoc(expression[2:])
1152   - expression = expression[6:]
  1231 + elif ptgid == 0x3A or ptgid == 0x5A: # ptgRef3d ptgRef3dV
  1232 + #a# parse sheet reference: expression[:2]
  1233 + expression = expression[2:]
  1234 + cellref, expression = ParseLoc(expression, cellrefformat)
  1235 + result += '!%s ' % cellref
  1236 + stack.append(cellref)
1153 1237 elif ptgid == 0x39: # PtgNameX
1154 1238 expression = expression[2:]
1155 1239 formatcodes = 'H'
1156 1240 formatsize = struct.calcsize(formatcodes)
1157 1241 nameindex = struct.unpack(formatcodes, expression[0:formatsize])[0]
1158   - result += ' NAMEIDX %d ' % nameindex
  1242 + result += ' NAMEIDX %d ' % nameindex
1159 1243 expression = expression[4:]
1160 1244 elif ptgid == 0x21: #ptgFunc
1161 1245 functionid = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100
1162   - result += '%s ' % dFunctions.get(functionid, '*UNKNOWN FUNCTION*')
  1246 + result += '%s ' % GetFunctionName(functionid)
1163 1247 expression = expression[2:]
1164   - elif ptgid == 0x61 or ptgid == 0x62: # ptgFuncVar ptgFuncVarA
  1248 + elif ptgid == 0x61 or ptgid == 0x62: # ptgFuncVar ptgFuncVarA
1165 1249 params_count = P23Ord(expression[0])
1166 1250 functionid = P23Ord(expression[1]) + P23Ord(expression[2]) * 0x100
1167   - result += '%s ' % dFunctions.get(functionid, '*UNKNOWN FUNCTION*')
  1251 + result += '%s ' % GetFunctionName(functionid)
1168 1252 expression = expression[(2+params_count):]
1169 1253 else:
1170 1254 break
... ... @@ -1172,15 +1256,34 @@ def ParseExpression(expression):
1172 1256 result += '*UNKNOWN TOKEN* 0x%04x' % ptgid
1173 1257 break
1174 1258 if expression == b'':
1175   - return result
  1259 + return result, stack
1176 1260 else:
1177 1261 functions = [dFunctions[functionid] for functionid in [0x6E, 0x95] if ContainsWP23Ord(functionid, expression)]
1178 1262 if functions != []:
1179 1263 message = ' Could contain following functions: ' + ','.join(functions) + ' -'
1180 1264 else:
1181 1265 message = ''
1182   - return result + ' *INCOMPLETE FORMULA PARSING*' + message + ' Remaining, unparsed expression: ' + repr(expression)
  1266 + return result + ' *INCOMPLETE FORMULA PARSING*' + message + ' Remaining, unparsed expression: ' + repr(expression), stack
  1267 +
  1268 +def DecodeRKValue(data):
  1269 + number = P23Ord(data[0])
  1270 + divider = 1.0
  1271 + if number & 0x01:
  1272 + divider = 100.0
  1273 + if number & 0x02:
  1274 + print(repr(data))
  1275 + raise Exception('DecodeRKValue')
  1276 + return (struct.unpack('<i', data)[0] >> 2) / divider
  1277 + else:
  1278 + return struct.unpack('<d', b'\x00\x00\x00\x00' + data)[0] / divider
1183 1279  
  1280 +def ShortXLUnicodeString(data):
  1281 + cch = P23Ord(data[0])
  1282 + highbyte = P23Ord(data[1])
  1283 + if highbyte == 0:
  1284 + return P23Decode(data[2:2 + cch])
  1285 + else:
  1286 + return repr(data[2:2 + cch * 2])
1184 1287  
1185 1288 class cBIFF(cPluginParent):
1186 1289 macroOnly = False
... ... @@ -1194,8 +1297,6 @@ class cBIFF(cPluginParent):
1194 1297  
1195 1298 def Analyze(self):
1196 1299 result = []
1197   - macros4Found = False
1198   - filepassFound = False
1199 1300 dOpcodes = {
1200 1301 0x06: 'FORMULA : Cell Formula',
1201 1302 0x0A: 'EOF : End of File',
... ... @@ -1225,6 +1326,8 @@ class cBIFF(cPluginParent):
1225 1326 0x2A: 'PRINTHEADERS : Print Row/Column Labels',
1226 1327 0x2B: 'PRINTGRIDLINES : Print Gridlines Flag',
1227 1328 0x2F: 'FILEPASS : File Is Password-Protected',
  1329 + 0x31: 'FONT',
  1330 + 0x32: 'FONT2',
1228 1331 0x3C: 'CONTINUE : Continues Long Records',
1229 1332 0x3D: 'WINDOW1 : Window Information',
1230 1333 0x40: 'BACKUP : Save Backup Version of the File',
... ... @@ -1372,6 +1475,7 @@ class cBIFF(cPluginParent):
1372 1475 0x231: 'FONT : Font Description',
1373 1476 0x236: 'TABLE : Data Table',
1374 1477 0x23E: 'WINDOW2 : Sheet Window Information',
  1478 + 0x27E: 'RK : Cell Value, RK Number',
1375 1479 0x293: 'STYLE : Style Information',
1376 1480 0x406: 'FORMULA : Cell Formula',
1377 1481 0x41E: 'FORMAT : Number Format',
... ... @@ -1470,12 +1574,18 @@ class cBIFF(cPluginParent):
1470 1574 oParser.add_option('-x', '--xlm', action='store_true', default=False, help='Select all records relevant for Excel 4.0 macros')
1471 1575 oParser.add_option('-o', '--opcode', type=str, default='', help='Opcode to filter for')
1472 1576 oParser.add_option('-f', '--find', type=str, default='', help='Content to search for')
  1577 + oParser.add_option('-c', '--csv', action='store_true', default=False, help='Produce CSV')
  1578 + oParser.add_option('-r', '--cellrefformat', type=str, default='rc', help='Cell reference format (RC, LN)')
1473 1579 (options, args) = oParser.parse_args(self.options.split(' '))
1474 1580  
1475 1581 if options.find.startswith('0x'):
1476 1582 options.find = binascii.a2b_hex(options.find[2:])
1477 1583  
1478 1584 position = 0
  1585 + macros4Found = False
  1586 + filepassFound = False
  1587 + dSheetNames = {}
  1588 + currentSheetname = ''
1479 1589 while position < len(stream):
1480 1590 formatcodes = 'HH'
1481 1591 formatsize = struct.calcsize(formatcodes)
... ... @@ -1483,6 +1593,7 @@ class cBIFF(cPluginParent):
1483 1593 break
1484 1594 opcode, length = struct.unpack(formatcodes, stream[position:position + formatsize])
1485 1595 data = stream[position + formatsize:position + formatsize + length]
  1596 + positionBIFFRecord = position
1486 1597 position = position + formatsize + length
1487 1598  
1488 1599 if opcode in dOpcodes:
... ... @@ -1491,16 +1602,21 @@ class cBIFF(cPluginParent):
1491 1602 opcodename = ''
1492 1603 line = '%04x %6d %s' % (opcode, length, opcodename)
1493 1604  
  1605 + csvrow = None
  1606 +
1494 1607 # FORMULA record
1495 1608 if opcode == 0x06 and len(data) >= 21:
1496   - formatcodes = 'HH'
1497   - formatsize = struct.calcsize(formatcodes)
1498   - row, column = struct.unpack(formatcodes, data[0:formatsize])
  1609 + cellref, dummy = ParseLoc(data, options.cellrefformat, True)
1499 1610 formatcodes = 'H'
1500 1611 formatsize = struct.calcsize(formatcodes)
1501 1612 length = struct.unpack(formatcodes, data[20:20 + formatsize])[0]
1502 1613 expression = data[22:]
1503   - line += ' - R%dC%d len=%d %s' % (row + 1, column + 1, length, ParseExpression(expression))
  1614 + parsedExpression, stack = ParseExpression(expression, options.cellrefformat)
  1615 + line += ' - %s len=%d %s' % (cellref, length, parsedExpression)
  1616 + if len(stack) == 1:
  1617 + csvrow = [currentSheetname, cellref, stack[0], '']
  1618 + else:
  1619 + csvrow = [currentSheetname, cellref, repr(stack), '']
1504 1620 if options.formulabytes:
1505 1621 data_hex = P23Decode(binascii.b2a_hex(data))
1506 1622 spaced_data_hex = ' '.join(a+b for a,b in zip(data_hex[::2], data_hex[1::2]))
... ... @@ -1508,17 +1624,22 @@ class cBIFF(cPluginParent):
1508 1624  
1509 1625 # FORMULA record #a# difference BIFF4 and BIFF5+
1510 1626 if opcode == 0x18 and len(data) >= 16:
1511   - if P23Ord(data[0]) & 0x20:
  1627 + flags = P23Ord(data[0])
  1628 + lnName = P23Ord(data[3])
  1629 + szFormula = P23Ord(data[4]) + P23Ord(data[5]) * 0x100
  1630 + offset = 14
  1631 + if P23Ord(data[offset]) == 0: #a# hack with BIFF8 Unicode
  1632 + offset = 15
  1633 + if flags & 0x20:
1512 1634 dBuildInNames = {1: 'Auto_Open', 2: 'Auto_Close'}
1513   - code = P23Ord(data[14])
1514   - if code == 0: #a# hack with BIFF8 Unicode
1515   - code = P23Ord(data[15])
  1635 + code = P23Ord(data[offset])
1516 1636 line += ' - build-in-name %d %s' % (code, dBuildInNames.get(code, '?'))
1517 1637 else:
1518   - offset = 14
1519   - if P23Ord(data[offset]) == 0:
1520   - offset = 15
1521   - line += ' - %s' % (P23Decode(data[offset:offset+P23Ord(data[3])]))
  1638 + line += ' - %s' % (P23Decode(data[offset:offset+lnName]))
  1639 + if flags & 0x01:
  1640 + line += ' hidden'
  1641 + parsedExpression, stack = ParseExpression(data[offset+lnName:offset+lnName+szFormula], options.cellrefformat)
  1642 + line += ' len=%d %s' % (szFormula, parsedExpression)
1522 1643  
1523 1644 # FILEPASS record
1524 1645 if opcode == 0x2f:
... ... @@ -1526,11 +1647,27 @@ class cBIFF(cPluginParent):
1526 1647  
1527 1648 # BOUNDSHEET record
1528 1649 if opcode == 0x85 and len(data) >= 6:
  1650 + formatcodes = '<IBB'
  1651 + formatsize = struct.calcsize(formatcodes)
  1652 + positionBOF, sheetState, sheetType = struct.unpack(formatcodes, data[0:formatsize])
1529 1653 dSheetType = {0: 'worksheet or dialog sheet', 1: 'Excel 4.0 macro sheet', 2: 'chart', 6: 'Visual Basic module'}
1530   - if P23Ord(data[5]) == 1:
  1654 + if sheetType == 1:
1531 1655 macros4Found = True
1532 1656 dSheetState = {0: 'visible', 1: 'hidden', 2: 'very hidden'}
1533   - line += ' - %s, %s' % (dSheetType.get(P23Ord(data[5]), '%02x' % P23Ord(data[5])), dSheetState.get(P23Ord(data[4]), '%02x' % P23Ord(data[4])))
  1657 + sheetName = ShortXLUnicodeString(data[6:])
  1658 + dSheetNames[positionBOF] = sheetName
  1659 + line += ' - %s, %s - %s' % (dSheetType.get(sheetType, '%02x' % sheetType), dSheetState.get(sheetState, '%02x' % sheetState), sheetName)
  1660 +
  1661 + # BOF record
  1662 + if opcode == 0x0809 and len(data) >= 4:
  1663 + formatcodes = 'H'
  1664 + formatsize = struct.calcsize(formatcodes)
  1665 + dt = struct.unpack(formatcodes, data[2:2 + formatsize])[0]
  1666 + dStreamType = {5: 'workbook', 0x10: 'dialog sheet/worksheet', 0x20: 'chart sheet', 0x40: 'macro sheet'}
  1667 + line += ' - %s' % (dStreamType.get(dt, '0x%04x' % dt))
  1668 + if positionBIFFRecord in dSheetNames:
  1669 + line += ' - %s' % (dSheetNames[positionBIFFRecord])
  1670 + currentSheetname = dSheetNames[positionBIFFRecord]
1534 1671  
1535 1672 # STRING record
1536 1673 if opcode == 0x207 and len(data) >= 4:
... ... @@ -1544,9 +1681,32 @@ class cBIFF(cPluginParent):
1544 1681 strings += b' '.join(values[1])
1545 1682 line += ' - %s' % strings
1546 1683  
  1684 + # number record
  1685 + if opcode == 0x0203:
  1686 + cellref, data2 = ParseLoc(data, options.cellrefformat, True)
  1687 + formatcodes = '<Hd'
  1688 + formatsize = struct.calcsize(formatcodes)
  1689 + xf, value = struct.unpack(formatcodes, data2[:formatsize])
  1690 + line += ' - %s %.20f' % (cellref, value)
  1691 + csvrow = [currentSheetname, cellref, '', '%.20f' % value]
  1692 +
  1693 + # RK record
  1694 + if opcode == 0x027E and len(data) == 10:
  1695 + cellref, data2 = ParseLoc(data, options.cellrefformat, True)
  1696 + formatcodes = '<H'
  1697 + formatsize = struct.calcsize(formatcodes)
  1698 + xf = struct.unpack(formatcodes, data2[:formatsize])
  1699 + value = DecodeRKValue(data2[formatsize:])
  1700 + line += ' - %s %f' % (cellref, value)
  1701 + csvrow = [currentSheetname, cellref, '', '%.20f' % value]
  1702 +
1547 1703 if options.find == '' and options.opcode == '' and not options.xlm or options.opcode != '' and options.opcode.lower() in line.lower() or options.find != '' and options.find in data or options.xlm and opcode in [0x06, 0x18, 0x85, 0x207]:
1548 1704 if not options.hex and not options.dump:
1549   - result.append(line)
  1705 + if options.csv:
  1706 + if csvrow != None:
  1707 + result.append(csvrow)
  1708 + else:
  1709 + result.append(line)
1550 1710  
1551 1711 if options.hexascii:
1552 1712 result.extend(' ' + foundstring for foundstring in HexASCII(data, 8))
... ... @@ -1565,6 +1725,8 @@ class cBIFF(cPluginParent):
1565 1725 result = ['FILEPASS record: file is password protected']
1566 1726 elif options.xlm and not macros4Found:
1567 1727 result = []
  1728 + elif options.csv:
  1729 + result = [MakeCSVLine(row, DEFAULT_SEPARATOR, QUOTE) for row in [['Sheet', 'Reference', 'Formula', 'Value']] + result]
1568 1730  
1569 1731 return result
1570 1732  
... ...