Commit 6d7514ce799d78c0b23c4d187554423ca78b3149
1 parent
8b97e45b
plugin_biff: updated to v0.0.11 (improved formula parsing)
Showing
4 changed files
with
203 additions
and
46 deletions
oletools/olevba.py
| ... | ... | @@ -227,7 +227,7 @@ from __future__ import print_function |
| 227 | 227 | # 2020-01-31 v0.56 KS: - added option --no-xlm, improved MHT detection |
| 228 | 228 | # 2020-03-22 PL: - uses plugin_biff to display DCONN objects and their URL |
| 229 | 229 | |
| 230 | -__version__ = '0.56dev3' | |
| 230 | +__version__ = '0.56dev4' | |
| 231 | 231 | |
| 232 | 232 | #------------------------------------------------------------------------------ |
| 233 | 233 | # TODO: | ... | ... |
oletools/thirdparty/oledump/oledump_extract.py
0 → 100644
| 1 | +#!/usr/bin/env python | |
| 2 | + | |
| 3 | +# Small extract of oledump.py to be able to run plugin_biff from olevba | |
| 4 | + | |
| 5 | +__description__ = 'Analyze OLE files (Compound Binary Files)' | |
| 6 | +__author__ = 'Didier Stevens' | |
| 7 | +__version__ = '0.0.49' | |
| 8 | +__date__ = '2020/03/28' | |
| 9 | + | |
| 10 | +""" | |
| 11 | + | |
| 12 | +Source code put in public domain by Didier Stevens, no Copyright | |
| 13 | +https://DidierStevens.com | |
| 14 | +Use at your own risk | |
| 15 | +""" | |
| 16 | + | |
| 17 | +class cPluginParent(): | |
| 18 | + macroOnly = False | |
| 19 | + indexQuiet = False | |
| 20 | + | |
| 21 | +plugins = [] | |
| 22 | + | |
| 23 | +def AddPlugin(cClass): | |
| 24 | + global plugins | |
| 25 | + | |
| 26 | + plugins.append(cClass) | |
| 27 | + | |
| 28 | + | |
| 29 | +# CIC: Call If Callable | |
| 30 | +def CIC(expression): | |
| 31 | + if callable(expression): | |
| 32 | + return expression() | |
| 33 | + else: | |
| 34 | + return expression | |
| 35 | + | |
| 36 | +# IFF: IF Function | |
| 37 | +def IFF(expression, valueTrue, valueFalse): | |
| 38 | + if expression: | |
| 39 | + return CIC(valueTrue) | |
| 40 | + else: | |
| 41 | + return CIC(valueFalse) | |
| 42 | + | |
| 43 | +def P23Ord(value): | |
| 44 | + if type(value) == int: | |
| 45 | + return value | |
| 46 | + else: | |
| 47 | + return ord(value) | |
| 48 | + | |
| 49 | +def P23Chr(value): | |
| 50 | + if type(value) == int: | |
| 51 | + return chr(value) | |
| 52 | + else: | |
| 53 | + return value | ... | ... |
oletools/thirdparty/oledump/plugin_biff.py
| ... | ... | @@ -2,8 +2,8 @@ |
| 2 | 2 | |
| 3 | 3 | __description__ = 'BIFF plugin for oledump.py' |
| 4 | 4 | __author__ = 'Didier Stevens' |
| 5 | -__version__ = '0.0.9' | |
| 6 | -__date__ = '2020/03/09' | |
| 5 | +__version__ = '0.0.11' | |
| 6 | +__date__ = '2020/04/06' | |
| 7 | 7 | |
| 8 | 8 | # Slightly modified version by Philippe Lagadec to be imported into olevba |
| 9 | 9 | |
| ... | ... | @@ -28,6 +28,14 @@ History: |
| 28 | 28 | 2020/02/23: 0.0.7 performance improvement |
| 29 | 29 | 2020/03/08: 0.0.8 added options -X and -d |
| 30 | 30 | 2020/03/09: 0.0.9 improved formula parsing; Python 3 bugfixes |
| 31 | + 2020/03/27: 0.0.10 improved formula parsing and debug modes. (by @JohnLaTwC) | |
| 32 | + 05219f8c047f1dff861634c4b50d4f6978c87c35f4c14d21ee9d757cac9280cf (ptgConcat) | |
| 33 | + 94b26003699efba54ced98006379a230d1154f340589cc89af7d0cbedb861a53 (encoding, ptgFuncVarA, ptgNameX) | |
| 34 | + d3c1627ca2775d98717eb1abf2b70aedf383845d87993c6b924f2f55d9d4d696 (ptgArea) | |
| 35 | + 01761b06c24baa818b0a75059e745871246a5e9c6ce0243ad96e8632342cbb59 (ptgFuncVarA) | |
| 36 | + d3c1627ca2775d98717eb1abf2b70aedf383845d87993c6b924f2f55d9d4d696 (ptgFunc) | |
| 37 | + 1d48a42a0b06a087e966b860c8f293a9bf57da8d70f5f83c61242afc5b81eb4f (=SELECT($B$1:$1000:$1000:$B:$B,$B$1)) | |
| 38 | + 2020/04/06: 0.0.11 Python 2 bugfixes; password protect record FILEPASS | |
| 31 | 39 | |
| 32 | 40 | Todo: |
| 33 | 41 | """ |
| ... | ... | @@ -35,44 +43,19 @@ Todo: |
| 35 | 43 | import struct |
| 36 | 44 | import re |
| 37 | 45 | import optparse |
| 46 | + | |
| 47 | +# Modifications for olevba: | |
| 38 | 48 | import sys |
| 39 | 49 | import binascii |
| 40 | - | |
| 41 | -# A few functions backported from oledump.py: | |
| 42 | - | |
| 43 | -class cPluginParent(): | |
| 44 | - macroOnly = False | |
| 45 | - indexQuiet = False | |
| 46 | - | |
| 47 | -# CIC: Call If Callable | |
| 48 | -def CIC(expression): | |
| 49 | - if callable(expression): | |
| 50 | - return expression() | |
| 51 | - else: | |
| 52 | - return expression | |
| 53 | - | |
| 54 | -# IFF: IF Function | |
| 55 | -def IFF(expression, valueTrue, valueFalse): | |
| 56 | - if expression: | |
| 57 | - return CIC(valueTrue) | |
| 58 | - else: | |
| 59 | - return CIC(valueFalse) | |
| 60 | - | |
| 61 | -def P23Ord(value): | |
| 62 | - if type(value) == int: | |
| 63 | - return value | |
| 64 | - else: | |
| 65 | - return ord(value) | |
| 66 | - | |
| 67 | -def P23Chr(value): | |
| 68 | - if type(value) == int: | |
| 69 | - return chr(value) | |
| 70 | - else: | |
| 71 | - return value | |
| 50 | +from .oledump_extract import * | |
| 51 | +# end modifications | |
| 72 | 52 | |
| 73 | 53 | def P23Decode(value): |
| 74 | 54 | if sys.version_info[0] > 2: |
| 75 | - return value.decode() | |
| 55 | + try: | |
| 56 | + return value.decode('utf-8') | |
| 57 | + except UnicodeDecodeError as u: | |
| 58 | + return value.decode('windows-1252') | |
| 76 | 59 | else: |
| 77 | 60 | return value |
| 78 | 61 | |
| ... | ... | @@ -115,6 +98,67 @@ def Strings(data, encodings='sL'): |
| 115 | 98 | def ContainsWP23Ord(word, expression): |
| 116 | 99 | return struct.pack('<H', word) in expression |
| 117 | 100 | |
| 101 | +# https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/75afd109-b1ce-4511-b56f-2d63116f6647 | |
| 102 | +def ParseArea(expression): | |
| 103 | + formatcodes = 'HHHH' | |
| 104 | + formatsize = struct.calcsize(formatcodes) | |
| 105 | + if len(expression) < formatsize: | |
| 106 | + return '*ERROR*' | |
| 107 | + row1,row2,col1,col2 = struct.unpack(formatcodes, expression[0:formatsize]) | |
| 108 | + row1Relative = col1 & 0x8000 | |
| 109 | + col1Relative = col1 & 0x4000 | |
| 110 | + row2Relative = col2 & 0x8000 | |
| 111 | + col2Relative = col2 & 0x4000 | |
| 112 | + col1 = col1 & 0x3FFF | |
| 113 | + col2 = col2 & 0x3FFF | |
| 114 | + | |
| 115 | + if row1Relative: | |
| 116 | + row1indicator = '~' | |
| 117 | + else: | |
| 118 | + row1indicator = '' | |
| 119 | + row1 += 1 | |
| 120 | + if col1Relative: | |
| 121 | + col1indicator = '~' | |
| 122 | + else: | |
| 123 | + col1indicator = '' | |
| 124 | + col1 += 1 | |
| 125 | + if row2Relative: | |
| 126 | + row2indicator = '~' | |
| 127 | + else: | |
| 128 | + row2indicator = '' | |
| 129 | + row2 += 1 | |
| 130 | + if col2Relative: | |
| 131 | + col2indicator = '~' | |
| 132 | + else: | |
| 133 | + col2indicator = '' | |
| 134 | + col2 += 1 | |
| 135 | + | |
| 136 | + if row1 == row2 and col2 >=256: | |
| 137 | + return 'R%s%d' % (row1indicator, row1) | |
| 138 | + if col1 == col2 and row2 >= 65536: | |
| 139 | + return 'C%s%d' % (col1indicator, col1) | |
| 140 | + | |
| 141 | + return 'R%s%dC%s%d' % (row1indicator, row1, col1indicator, col1) | |
| 142 | + | |
| 143 | +# https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/6e5eed10-5b77-43d6-8dd0-37345f8654ad | |
| 144 | +def ParseLocRelU(expression): | |
| 145 | + row = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100 | |
| 146 | + column = P23Ord(expression[2]) + P23Ord(expression[3]) * 0x100 | |
| 147 | + rowRelative = False #P23Ord(expression[3]) & 0x0001 | |
| 148 | + colRelative = False #P23Ord(expression[3]) & 0x0002 | |
| 149 | + #column = column & 0xFFFC | |
| 150 | + if rowRelative: | |
| 151 | + rowindicator = '~' | |
| 152 | + else: | |
| 153 | + rowindicator = '' | |
| 154 | + row += 1 | |
| 155 | + if colRelative: | |
| 156 | + colindicator = '~' | |
| 157 | + else: | |
| 158 | + colindicator = '' | |
| 159 | + column += 1 | |
| 160 | + return 'R%s%dC%s%d' % (rowindicator, row, colindicator, column) | |
| 161 | + | |
| 118 | 162 | #https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/6e5eed10-5b77-43d6-8dd0-37345f8654ad |
| 119 | 163 | def ParseLoc(expression): |
| 120 | 164 | formatcodes = 'HH' |
| ... | ... | @@ -160,6 +204,7 @@ def ParseExpression(expression): |
| 160 | 204 | 0x15: 'ptgParen', |
| 161 | 205 | 0x16: 'ptgMissArg', |
| 162 | 206 | 0x17: 'ptgStr', |
| 207 | +0x18: 'ptgExtend', | |
| 163 | 208 | 0x19: 'ptgAttr', |
| 164 | 209 | 0x1A: 'ptgSheet', |
| 165 | 210 | 0x1B: 'ptgEndSheet', |
| ... | ... | @@ -1017,15 +1062,38 @@ def ParseExpression(expression): |
| 1017 | 1062 | result += dTokens[ptgid] + ' ' |
| 1018 | 1063 | if ptgid == 0x03: # ptgAdd https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/27db2f45-11e8-4238-94ed-92fd9c5721fb |
| 1019 | 1064 | pass |
| 1065 | + elif ptgid == 0x4: # ptgSub | |
| 1066 | + pass | |
| 1067 | + elif ptgid == 0x5: # ptgMul | |
| 1068 | + pass | |
| 1069 | + elif ptgid == 0x6: # ptgDiv | |
| 1070 | + pass | |
| 1071 | + elif ptgid == 0x8: # ptgConcat | |
| 1072 | + pass | |
| 1020 | 1073 | elif ptgid == 0x09: # ptgLt https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/28de4981-1352-4a5e-a3b7-f15a8a6ce7fb |
| 1021 | 1074 | pass |
| 1075 | + elif ptgid == 0x0A: # ptgLE | |
| 1076 | + pass | |
| 1077 | + elif ptgid == 0x0B: # ptgEQ | |
| 1078 | + pass | |
| 1079 | + elif ptgid == 0x0C: # ptgGE | |
| 1080 | + pass | |
| 1081 | + elif ptgid == 0x0D: # ptgGT | |
| 1082 | + pass | |
| 1083 | + elif ptgid == 0x0E: # ptgNE | |
| 1084 | + pass | |
| 1022 | 1085 | elif ptgid == 0x17: # ptgStr https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/87c2a057-705c-4473-a168-6d5fac4a9eba |
| 1023 | 1086 | length = P23Ord(expression[0]) |
| 1024 | 1087 | expression = expression[1:] |
| 1025 | 1088 | if P23Ord(expression[0]) == 0: # probably BIFF8 -> UNICODE (compressed) |
| 1026 | 1089 | expression = expression[1:] |
| 1027 | - result += '"%s" ' % P23Decode(expression[:length]) | |
| 1028 | - expression = expression[length:] | |
| 1090 | + result += '"%s" ' % P23Decode(expression[:length]) | |
| 1091 | + expression = expression[length:] | |
| 1092 | + elif P23Ord(expression[0]) == 1: # if 1, then double byte chars | |
| 1093 | + # doublebyte check: https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/05162858-0ca9-44cb-bb07-a720928f63f8 | |
| 1094 | + expression = expression[1:] | |
| 1095 | + result += '"%s" ' % P23Decode(expression[:length*2]) | |
| 1096 | + expression = expression[length*2:] | |
| 1029 | 1097 | elif ptgid == 0x19: |
| 1030 | 1098 | grbit = P23Ord(expression[0]) |
| 1031 | 1099 | expression = expression[1:] |
| ... | ... | @@ -1046,10 +1114,12 @@ def ParseExpression(expression): |
| 1046 | 1114 | functionid = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100 |
| 1047 | 1115 | result += '%s (0x%04x) ' % (dFunctions.get(functionid, '*UNKNOWN FUNCTION*'), functionid) |
| 1048 | 1116 | expression = expression[2:] |
| 1049 | - elif ptgid == 0x22 or ptgid == 0x42: | |
| 1117 | + elif ptgid == 0x22 or ptgid == 0x42 or ptgid == 0x62: | |
| 1050 | 1118 | functionid = P23Ord(expression[1]) + P23Ord(expression[2]) * 0x100 |
| 1051 | 1119 | result += 'args %d func %s (0x%04x) ' % (P23Ord(expression[0]), dFunctions.get(functionid, '*UNKNOWN FUNCTION*'), functionid) |
| 1052 | 1120 | expression = expression[3:] |
| 1121 | + if functionid == 0x806D: | |
| 1122 | + expression = expression[9:] | |
| 1053 | 1123 | elif ptgid == 0x23: # ptgName https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/5f05c166-dfe3-4bbf-85aa-31c09c0258c0 |
| 1054 | 1124 | result += '0x%08x ' % (struct.unpack('<I', expression[0:4])) |
| 1055 | 1125 | expression = expression[4:] |
| ... | ... | @@ -1057,8 +1127,9 @@ def ParseExpression(expression): |
| 1057 | 1127 | result += 'FLOAT ' |
| 1058 | 1128 | expression = expression[8:] |
| 1059 | 1129 | elif ptgid == 0x26: |
| 1060 | - expression = expression[4:] | |
| 1061 | - expression = expression[P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100:] | |
| 1130 | + ## expression = expression[4:] | |
| 1131 | + ## expression = expression[P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100:] | |
| 1132 | + expression = expression[6:] | |
| 1062 | 1133 | result += 'REFERENCE-EXPRESSION ' |
| 1063 | 1134 | elif ptgid == 0x01: |
| 1064 | 1135 | formatcodes = 'HH' |
| ... | ... | @@ -1067,11 +1138,32 @@ def ParseExpression(expression): |
| 1067 | 1138 | expression = expression[formatsize:] |
| 1068 | 1139 | result += 'R%dC%d ' % (row + 1, column + 1) |
| 1069 | 1140 | elif ptgid == 0x24 or ptgid == 0x44: |
| 1070 | - result += '%s ' % ParseLoc(expression) | |
| 1141 | + result += '%s ' % ParseLocRelU(expression) | |
| 1071 | 1142 | expression = expression[4:] |
| 1143 | + elif ptgid == 0x11: # ptgRange | |
| 1144 | + pass | |
| 1145 | + elif ptgid == 0x25: # ptgArea | |
| 1146 | + result += '%s ' % ParseArea(expression[0:8]) | |
| 1147 | + expression = expression[8:] | |
| 1072 | 1148 | elif ptgid == 0x3A or ptgid == 0x5A: |
| 1073 | 1149 | result += '%s ' % ParseLoc(expression[2:]) |
| 1074 | 1150 | expression = expression[6:] |
| 1151 | + elif ptgid == 0x39: # PtgNameX | |
| 1152 | + expression = expression[2:] | |
| 1153 | + formatcodes = 'H' | |
| 1154 | + formatsize = struct.calcsize(formatcodes) | |
| 1155 | + nameindex = struct.unpack(formatcodes, expression[0:formatsize])[0] | |
| 1156 | + result += ' NAMEIDX %d ' % nameindex | |
| 1157 | + expression = expression[4:] | |
| 1158 | + elif ptgid == 0x21: #ptgFunc | |
| 1159 | + functionid = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100 | |
| 1160 | + result += '%s ' % dFunctions.get(functionid, '*UNKNOWN FUNCTION*') | |
| 1161 | + expression = expression[2:] | |
| 1162 | + elif ptgid == 0x61 or ptgid == 0x62: # ptgFuncVar ptgFuncVarA | |
| 1163 | + params_count = P23Ord(expression[0]) | |
| 1164 | + functionid = P23Ord(expression[1]) + P23Ord(expression[2]) * 0x100 | |
| 1165 | + result += '%s ' % dFunctions.get(functionid, '*UNKNOWN FUNCTION*') | |
| 1166 | + expression = expression[(2+params_count):] | |
| 1075 | 1167 | else: |
| 1076 | 1168 | break |
| 1077 | 1169 | else: |
| ... | ... | @@ -1101,6 +1193,7 @@ class cBIFF(cPluginParent): |
| 1101 | 1193 | def Analyze(self): |
| 1102 | 1194 | result = [] |
| 1103 | 1195 | macros4Found = False |
| 1196 | + filepassFound = False | |
| 1104 | 1197 | dOpcodes = { |
| 1105 | 1198 | 0x06: 'FORMULA : Cell Formula', |
| 1106 | 1199 | 0x0A: 'EOF : End of File', |
| ... | ... | @@ -1370,6 +1463,7 @@ class cBIFF(cPluginParent): |
| 1370 | 1463 | oParser.add_option('-s', '--strings', action='store_true', default=False, help='Dump strings') |
| 1371 | 1464 | oParser.add_option('-a', '--hexascii', action='store_true', default=False, help='Dump hex ascii') |
| 1372 | 1465 | oParser.add_option('-X', '--hex', action='store_true', default=False, help='Dump hex without whitespace') |
| 1466 | + oParser.add_option('-b', '--formulabytes', action='store_true', default=False, help='Dump formula bytes') | |
| 1373 | 1467 | oParser.add_option('-d', '--dump', action='store_true', default=False, help='Dump') |
| 1374 | 1468 | oParser.add_option('-x', '--xlm', action='store_true', default=False, help='Select all records relevant for Excel 4.0 macros') |
| 1375 | 1469 | oParser.add_option('-o', '--opcode', type=str, default='', help='Opcode to filter for') |
| ... | ... | @@ -1405,6 +1499,10 @@ class cBIFF(cPluginParent): |
| 1405 | 1499 | length = struct.unpack(formatcodes, data[20:20 + formatsize])[0] |
| 1406 | 1500 | expression = data[22:] |
| 1407 | 1501 | line += ' - R%dC%d len=%d %s' % (row + 1, column + 1, length, ParseExpression(expression)) |
| 1502 | + if options.formulabytes: | |
| 1503 | + data_hex = P23Decode(binascii.b2a_hex(data)) | |
| 1504 | + spaced_data_hex = ' '.join(a+b for a,b in zip(data_hex[::2], data_hex[1::2])) | |
| 1505 | + line += '\nFORMULA BYTES: %s' % spaced_data_hex | |
| 1408 | 1506 | |
| 1409 | 1507 | # FORMULA record #a# difference BIFF4 and BIFF5+ |
| 1410 | 1508 | if opcode == 0x18 and len(data) >= 16: |
| ... | ... | @@ -1420,6 +1518,10 @@ class cBIFF(cPluginParent): |
| 1420 | 1518 | offset = 15 |
| 1421 | 1519 | line += ' - %s' % (P23Decode(data[offset:offset+P23Ord(data[3])])) |
| 1422 | 1520 | |
| 1521 | + # FILEPASS record | |
| 1522 | + if opcode == 0x2f: | |
| 1523 | + filepassFound = True | |
| 1524 | + | |
| 1423 | 1525 | # BOUNDSHEET record |
| 1424 | 1526 | if opcode == 0x85 and len(data) >= 6: |
| 1425 | 1527 | dSheetType = {0: 'worksheet or dialog sheet', 1: 'Excel 4.0 macro sheet', 2: 'chart', 6: 'Visual Basic module'} |
| ... | ... | @@ -1433,7 +1535,7 @@ class cBIFF(cPluginParent): |
| 1433 | 1535 | values = list(Strings(data[3:]).values()) |
| 1434 | 1536 | strings = b'' |
| 1435 | 1537 | if values[0] != []: |
| 1436 | - strings += b' '.join(values[0]) | |
| 1538 | + strings = values[0][0].encode() | |
| 1437 | 1539 | if values[1] != []: |
| 1438 | 1540 | if strings != b'': |
| 1439 | 1541 | strings += b' ' |
| ... | ... | @@ -1457,9 +1559,11 @@ class cBIFF(cPluginParent): |
| 1457 | 1559 | elif options.dump: |
| 1458 | 1560 | result = data |
| 1459 | 1561 | |
| 1460 | - if options.xlm and not macros4Found: | |
| 1562 | + if options.xlm and filepassFound: | |
| 1563 | + result = ['FILEPASS record: file is password protected'] | |
| 1564 | + elif options.xlm and not macros4Found: | |
| 1461 | 1565 | result = [] |
| 1462 | 1566 | |
| 1463 | 1567 | return result |
| 1464 | 1568 | |
| 1465 | -#AddPlugin(cBIFF) | |
| 1569 | +AddPlugin(cBIFF) | ... | ... |
setup.py
| ... | ... | @@ -52,7 +52,7 @@ import os, fnmatch |
| 52 | 52 | #--- METADATA ----------------------------------------------------------------- |
| 53 | 53 | |
| 54 | 54 | name = "oletools" |
| 55 | -version = '0.56dev3' | |
| 55 | +version = '0.56dev4' | |
| 56 | 56 | desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" |
| 57 | 57 | long_desc = open('oletools/README.rst').read() |
| 58 | 58 | author = "Philippe Lagadec" | ... | ... |