Commit 9201fe43e62b4496442eeeda518bd4c64cbef936

Authored by decalage2
1 parent 4901744d

rtofbj: handle the "\'" obfuscation trick - issue #281

Showing 2 changed files with 25 additions and 2 deletions
oletools/rtfobj.py
... ... @@ -82,8 +82,9 @@ http://www.decalage.info/python/oletools
82 82 # 2018-04-09 PL: - fixed issue #280: OLE Package were not detected on Python 3
83 83 # 2018-03-24 v0.53 PL: - fixed issue #292: \margSz is a destination
84 84 # 2018-04-27 PL: - extract and display the CLSID of OLE objects
  85 +# 2018-04-30 PL: - handle "\'" obfuscation trick - issue #281
85 86  
86   -__version__ = '0.53dev7'
  87 +__version__ = '0.53dev8'
87 88  
88 89 # ------------------------------------------------------------------------------
89 90 # TODO:
... ... @@ -706,6 +707,28 @@ class RtfObjParser(RtfParser):
706 707 # log.debug('- Control word "%s", param=%s, level=%d' % (cword, param, self.group_level))
707 708 pass
708 709  
  710 + def control_symbol(self, matchobject):
  711 + # log.debug('control symbol %r at index %Xh' % (matchobject.group(), self.index))
  712 + symbol = matchobject.group()[1:2]
  713 + if symbol == "'":
  714 + # read the two hex digits following "\'" - which can be any characters, not just hex digits
  715 + # (because within an objdata destination, they are simply ignored)
  716 + hexdigits = self.data[self.index+2:self.index+4]
  717 + # print(hexdigits)
  718 + # move the index two bytes forward
  719 + self.index += 2
  720 + if self.current_destination.cword == 'objdata':
  721 + # Here's the tricky part: there is a bug in the MS Word RTF parser at least
  722 + # until Word 2016, that removes the last hex digit before the \'hh control
  723 + # symbol, ONLY IF the number of hex digits read so far is odd.
  724 + # So to emulate that bug, we have to clean the data read so far by keeping
  725 + # only the hex digits:
  726 + # Filter out any non-hex character:
  727 + self.current_destination.data = re.sub(b'[^a-fA-F0-9]', b'', self.current_destination.data)
  728 + if len(self.current_destination.data) & 1 == 1:
  729 + # If the number of hex digits is odd, remove the last one:
  730 + self.current_destination.data = self.current_destination.data[:-1]
  731 +
709 732  
710 733 #=== FUNCTIONS ===============================================================
711 734  
... ...
setup.py
... ... @@ -43,7 +43,7 @@ import os, fnmatch
43 43 #--- METADATA -----------------------------------------------------------------
44 44  
45 45 name = "oletools"
46   -version = '0.53dev7'
  46 +version = '0.53dev8'
47 47 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR"
48 48 long_desc = open('oletools/README.rst').read()
49 49 author = "Philippe Lagadec"
... ...