From 9201fe43e62b4496442eeeda518bd4c64cbef936 Mon Sep 17 00:00:00 2001 From: decalage2 Date: Mon, 30 Apr 2018 07:27:53 +0200 Subject: [PATCH] rtofbj: handle the "\'" obfuscation trick - issue #281 --- oletools/rtfobj.py | 25 ++++++++++++++++++++++++- setup.py | 2 +- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/oletools/rtfobj.py b/oletools/rtfobj.py index fe28aa0..5a7b2bb 100644 --- a/oletools/rtfobj.py +++ b/oletools/rtfobj.py @@ -82,8 +82,9 @@ http://www.decalage.info/python/oletools # 2018-04-09 PL: - fixed issue #280: OLE Package were not detected on Python 3 # 2018-03-24 v0.53 PL: - fixed issue #292: \margSz is a destination # 2018-04-27 PL: - extract and display the CLSID of OLE objects +# 2018-04-30 PL: - handle "\'" obfuscation trick - issue #281 -__version__ = '0.53dev7' +__version__ = '0.53dev8' # ------------------------------------------------------------------------------ # TODO: @@ -706,6 +707,28 @@ class RtfObjParser(RtfParser): # log.debug('- Control word "%s", param=%s, level=%d' % (cword, param, self.group_level)) pass + def control_symbol(self, matchobject): + # log.debug('control symbol %r at index %Xh' % (matchobject.group(), self.index)) + symbol = matchobject.group()[1:2] + if symbol == "'": + # read the two hex digits following "\'" - which can be any characters, not just hex digits + # (because within an objdata destination, they are simply ignored) + hexdigits = self.data[self.index+2:self.index+4] + # print(hexdigits) + # move the index two bytes forward + self.index += 2 + if self.current_destination.cword == 'objdata': + # Here's the tricky part: there is a bug in the MS Word RTF parser at least + # until Word 2016, that removes the last hex digit before the \'hh control + # symbol, ONLY IF the number of hex digits read so far is odd. + # So to emulate that bug, we have to clean the data read so far by keeping + # only the hex digits: + # Filter out any non-hex character: + self.current_destination.data = re.sub(b'[^a-fA-F0-9]', b'', self.current_destination.data) + if len(self.current_destination.data) & 1 == 1: + # If the number of hex digits is odd, remove the last one: + self.current_destination.data = self.current_destination.data[:-1] + #=== FUNCTIONS =============================================================== diff --git a/setup.py b/setup.py index d812928..c8e67f9 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ import os, fnmatch #--- METADATA ----------------------------------------------------------------- name = "oletools" -version = '0.53dev7' +version = '0.53dev8' desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" long_desc = open('oletools/README.rst').read() author = "Philippe Lagadec" -- libgit2 0.21.4