From 6d7514ce799d78c0b23c4d187554423ca78b3149 Mon Sep 17 00:00:00 2001 From: decalage2 Date: Mon, 13 Apr 2020 22:32:36 +0200 Subject: [PATCH] plugin_biff: updated to v0.0.11 (improved formula parsing) --- oletools/olevba.py | 2 +- oletools/thirdparty/oledump/oledump_extract.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ oletools/thirdparty/oledump/plugin_biff.py | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------- setup.py | 2 +- 4 files changed, 203 insertions(+), 46 deletions(-) create mode 100644 oletools/thirdparty/oledump/oledump_extract.py diff --git a/oletools/olevba.py b/oletools/olevba.py index 0fcc0b2..ce0d971 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -227,7 +227,7 @@ from __future__ import print_function # 2020-01-31 v0.56 KS: - added option --no-xlm, improved MHT detection # 2020-03-22 PL: - uses plugin_biff to display DCONN objects and their URL -__version__ = '0.56dev3' +__version__ = '0.56dev4' #------------------------------------------------------------------------------ # TODO: diff --git a/oletools/thirdparty/oledump/oledump_extract.py b/oletools/thirdparty/oledump/oledump_extract.py new file mode 100644 index 0000000..407d411 --- /dev/null +++ b/oletools/thirdparty/oledump/oledump_extract.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +# Small extract of oledump.py to be able to run plugin_biff from olevba + +__description__ = 'Analyze OLE files (Compound Binary Files)' +__author__ = 'Didier Stevens' +__version__ = '0.0.49' +__date__ = '2020/03/28' + +""" + +Source code put in public domain by Didier Stevens, no Copyright +https://DidierStevens.com +Use at your own risk +""" + +class cPluginParent(): + macroOnly = False + indexQuiet = False + +plugins = [] + +def AddPlugin(cClass): + global plugins + + plugins.append(cClass) + + +# CIC: Call If Callable +def CIC(expression): + if callable(expression): + return expression() + else: + return expression + +# IFF: IF Function +def IFF(expression, valueTrue, valueFalse): + if expression: + return CIC(valueTrue) + else: + return CIC(valueFalse) + +def P23Ord(value): + if type(value) == int: + return value + else: + return ord(value) + +def P23Chr(value): + if type(value) == int: + return chr(value) + else: + return value diff --git a/oletools/thirdparty/oledump/plugin_biff.py b/oletools/thirdparty/oledump/plugin_biff.py index 69d8222..e9dae06 100644 --- a/oletools/thirdparty/oledump/plugin_biff.py +++ b/oletools/thirdparty/oledump/plugin_biff.py @@ -2,8 +2,8 @@ __description__ = 'BIFF plugin for oledump.py' __author__ = 'Didier Stevens' -__version__ = '0.0.9' -__date__ = '2020/03/09' +__version__ = '0.0.11' +__date__ = '2020/04/06' # Slightly modified version by Philippe Lagadec to be imported into olevba @@ -28,6 +28,14 @@ History: 2020/02/23: 0.0.7 performance improvement 2020/03/08: 0.0.8 added options -X and -d 2020/03/09: 0.0.9 improved formula parsing; Python 3 bugfixes + 2020/03/27: 0.0.10 improved formula parsing and debug modes. (by @JohnLaTwC) + 05219f8c047f1dff861634c4b50d4f6978c87c35f4c14d21ee9d757cac9280cf (ptgConcat) + 94b26003699efba54ced98006379a230d1154f340589cc89af7d0cbedb861a53 (encoding, ptgFuncVarA, ptgNameX) + d3c1627ca2775d98717eb1abf2b70aedf383845d87993c6b924f2f55d9d4d696 (ptgArea) + 01761b06c24baa818b0a75059e745871246a5e9c6ce0243ad96e8632342cbb59 (ptgFuncVarA) + d3c1627ca2775d98717eb1abf2b70aedf383845d87993c6b924f2f55d9d4d696 (ptgFunc) + 1d48a42a0b06a087e966b860c8f293a9bf57da8d70f5f83c61242afc5b81eb4f (=SELECT($B$1:$1000:$1000:$B:$B,$B$1)) + 2020/04/06: 0.0.11 Python 2 bugfixes; password protect record FILEPASS Todo: """ @@ -35,44 +43,19 @@ Todo: import struct import re import optparse + +# Modifications for olevba: import sys import binascii - -# A few functions backported from oledump.py: - -class cPluginParent(): - macroOnly = False - indexQuiet = False - -# CIC: Call If Callable -def CIC(expression): - if callable(expression): - return expression() - else: - return expression - -# IFF: IF Function -def IFF(expression, valueTrue, valueFalse): - if expression: - return CIC(valueTrue) - else: - return CIC(valueFalse) - -def P23Ord(value): - if type(value) == int: - return value - else: - return ord(value) - -def P23Chr(value): - if type(value) == int: - return chr(value) - else: - return value +from .oledump_extract import * +# end modifications def P23Decode(value): if sys.version_info[0] > 2: - return value.decode() + try: + return value.decode('utf-8') + except UnicodeDecodeError as u: + return value.decode('windows-1252') else: return value @@ -115,6 +98,67 @@ def Strings(data, encodings='sL'): def ContainsWP23Ord(word, expression): return struct.pack('=256: + return 'R%s%d' % (row1indicator, row1) + if col1 == col2 and row2 >= 65536: + return 'C%s%d' % (col1indicator, col1) + + return 'R%s%dC%s%d' % (row1indicator, row1, col1indicator, col1) + +# https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/6e5eed10-5b77-43d6-8dd0-37345f8654ad +def ParseLocRelU(expression): + row = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100 + column = P23Ord(expression[2]) + P23Ord(expression[3]) * 0x100 + rowRelative = False #P23Ord(expression[3]) & 0x0001 + colRelative = False #P23Ord(expression[3]) & 0x0002 + #column = column & 0xFFFC + if rowRelative: + rowindicator = '~' + else: + rowindicator = '' + row += 1 + if colRelative: + colindicator = '~' + else: + colindicator = '' + column += 1 + return 'R%s%dC%s%d' % (rowindicator, row, colindicator, column) + #https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/6e5eed10-5b77-43d6-8dd0-37345f8654ad def ParseLoc(expression): formatcodes = 'HH' @@ -160,6 +204,7 @@ def ParseExpression(expression): 0x15: 'ptgParen', 0x16: 'ptgMissArg', 0x17: 'ptgStr', +0x18: 'ptgExtend', 0x19: 'ptgAttr', 0x1A: 'ptgSheet', 0x1B: 'ptgEndSheet', @@ -1017,15 +1062,38 @@ def ParseExpression(expression): result += dTokens[ptgid] + ' ' if ptgid == 0x03: # ptgAdd https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/27db2f45-11e8-4238-94ed-92fd9c5721fb pass + elif ptgid == 0x4: # ptgSub + pass + elif ptgid == 0x5: # ptgMul + pass + elif ptgid == 0x6: # ptgDiv + pass + elif ptgid == 0x8: # ptgConcat + pass elif ptgid == 0x09: # ptgLt https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/28de4981-1352-4a5e-a3b7-f15a8a6ce7fb pass + elif ptgid == 0x0A: # ptgLE + pass + elif ptgid == 0x0B: # ptgEQ + pass + elif ptgid == 0x0C: # ptgGE + pass + elif ptgid == 0x0D: # ptgGT + pass + elif ptgid == 0x0E: # ptgNE + pass elif ptgid == 0x17: # ptgStr https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/87c2a057-705c-4473-a168-6d5fac4a9eba length = P23Ord(expression[0]) expression = expression[1:] if P23Ord(expression[0]) == 0: # probably BIFF8 -> UNICODE (compressed) expression = expression[1:] - result += '"%s" ' % P23Decode(expression[:length]) - expression = expression[length:] + result += '"%s" ' % P23Decode(expression[:length]) + expression = expression[length:] + elif P23Ord(expression[0]) == 1: # if 1, then double byte chars + # doublebyte check: https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/05162858-0ca9-44cb-bb07-a720928f63f8 + expression = expression[1:] + result += '"%s" ' % P23Decode(expression[:length*2]) + expression = expression[length*2:] elif ptgid == 0x19: grbit = P23Ord(expression[0]) expression = expression[1:] @@ -1046,10 +1114,12 @@ def ParseExpression(expression): functionid = P23Ord(expression[0]) + P23Ord(expression[1]) * 0x100 result += '%s (0x%04x) ' % (dFunctions.get(functionid, '*UNKNOWN FUNCTION*'), functionid) expression = expression[2:] - elif ptgid == 0x22 or ptgid == 0x42: + elif ptgid == 0x22 or ptgid == 0x42 or ptgid == 0x62: functionid = P23Ord(expression[1]) + P23Ord(expression[2]) * 0x100 result += 'args %d func %s (0x%04x) ' % (P23Ord(expression[0]), dFunctions.get(functionid, '*UNKNOWN FUNCTION*'), functionid) expression = expression[3:] + if functionid == 0x806D: + expression = expression[9:] elif ptgid == 0x23: # ptgName https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/5f05c166-dfe3-4bbf-85aa-31c09c0258c0 result += '0x%08x ' % (struct.unpack('= 16: @@ -1420,6 +1518,10 @@ class cBIFF(cPluginParent): offset = 15 line += ' - %s' % (P23Decode(data[offset:offset+P23Ord(data[3])])) + # FILEPASS record + if opcode == 0x2f: + filepassFound = True + # BOUNDSHEET record if opcode == 0x85 and len(data) >= 6: dSheetType = {0: 'worksheet or dialog sheet', 1: 'Excel 4.0 macro sheet', 2: 'chart', 6: 'Visual Basic module'} @@ -1433,7 +1535,7 @@ class cBIFF(cPluginParent): values = list(Strings(data[3:]).values()) strings = b'' if values[0] != []: - strings += b' '.join(values[0]) + strings = values[0][0].encode() if values[1] != []: if strings != b'': strings += b' ' @@ -1457,9 +1559,11 @@ class cBIFF(cPluginParent): elif options.dump: result = data - if options.xlm and not macros4Found: + if options.xlm and filepassFound: + result = ['FILEPASS record: file is password protected'] + elif options.xlm and not macros4Found: result = [] return result -#AddPlugin(cBIFF) +AddPlugin(cBIFF) diff --git a/setup.py b/setup.py index 09ed357..4867ff2 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ import os, fnmatch #--- METADATA ----------------------------------------------------------------- name = "oletools" -version = '0.56dev3' +version = '0.56dev4' desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" long_desc = open('oletools/README.rst').read() author = "Philippe Lagadec" -- libgit2 0.21.4