Commit 7909381f67d8b4d5e43e26ce4bd5fc53678b24ee

Authored by decalage2
1 parent d0d4c87f

merged PR #124

oletools/oleform.py 0 → 100644
  1 +#!/usr/bin/env python
  2 +
  3 +import struct
  4 +
  5 +class OleFormParsingError(Exception):
  6 + pass
  7 +
  8 +class Mask(object):
  9 + def __init__(self, val):
  10 + self._val = [(val & (1<<i))>>i for i in range(self._size)]
  11 +
  12 + def __str__(self):
  13 + return ', '.join(self._names[i] for i in range(self._size) if self._val[i])
  14 +
  15 + def __getattr__(self, name):
  16 + return self._val[self._names.index(name)]
  17 +
  18 + def __len__(self):
  19 + return self.size
  20 +
  21 + def __getitem__(self, key):
  22 + return self._val[self._names.index(key)]
  23 +
  24 +class FormPropMask(Mask):
  25 + """FormPropMask: [MS-OFORMS] 2.2.10.2"""
  26 + _size = 28
  27 + _names = ['Unused1', 'fBackColor', 'fForeColor', 'fNextAvailableID', 'Unused2_0', 'Unused2_1',
  28 + 'fBooleanProperties', 'fBooleanProperties', 'fMousePointer', 'fScrollBars',
  29 + 'fDisplayedSize', 'fLogicalSize', 'fScrollPosition', 'fGroupCnt', 'Reserved',
  30 + 'fMouseIcon', 'fCycle', 'fSpecialEffect', 'fBorderColor', 'fCaption', 'fFont',
  31 + 'fPicture', 'fZoom', 'fPictureAlignment', 'fPictureTiling', 'fPictureSizeMode',
  32 + 'fShapeCookie', 'fDrawBuffer']
  33 +
  34 +class SitePropMask(Mask):
  35 + """SitePropMask: [MS-OFORMS] 2.2.10.12.2"""
  36 + _size = 15
  37 + _names = ['fName', 'fTag', 'fID', 'fHelpContextID', 'fBitFlags', 'fObjectStreamSize',
  38 + 'fTabIndex', 'fClsidCacheIndex', 'fPosition', 'fGroupID', 'Unused1',
  39 + 'fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource']
  40 +
  41 +class MorphDataPropMask(Mask):
  42 + """MorphDataPropMask: [MS-OFORMS] 2.2.5.2"""
  43 + _size = 33
  44 + _names = ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength', 'fBorderStyle',
  45 + 'fScrollBars', 'fDisplayStyle', 'fMousePointer', 'fSize', 'fPasswordChar',
  46 + 'fListWidth', 'fBoundColumn', 'fTextColumn', 'fColumnCount', 'fListRows',
  47 + 'fcColumnInfo', 'fMatchEntry', 'fListStyle', 'fShowDropButtonWhen', 'UnusedBits1',
  48 + 'fDropButtonStyle', 'fMultiSelect', 'fValue', 'fCaption', 'fPicturePosition',
  49 + 'fBorderColor', 'fSpecialEffect', 'fMouseIcon', 'fPicture', 'fAccelerator',
  50 + 'UnusedBits2', 'Reserved', 'fGroupName']
  51 +
  52 +class ExtendedStream(object):
  53 + def __init__(self, stream, path):
  54 + self._pos = 0
  55 + self._jumps = []
  56 + self._stream = stream
  57 + self._path = path
  58 +
  59 + @classmethod
  60 + def open(cls, ole_file, path):
  61 + stream = ole_file.openstream(path)
  62 + return cls(stream, path)
  63 +
  64 + def read(self, size):
  65 + self._pos += size
  66 + return self._stream.read(size)
  67 +
  68 + def will_jump_to(self, size):
  69 + self._next_jump = (True, size)
  70 + return self
  71 +
  72 + def will_pad(self, pad=4):
  73 + self._next_jump = (False, pad)
  74 + return self
  75 +
  76 + def __enter__(self):
  77 + (jump_type, size) = self._next_jump
  78 + self._jumps.append((self._pos, jump_type, size))
  79 +
  80 + def __exit__(self, exc_type, exc_value, traceback):
  81 + if exc_type is None:
  82 + (start, jump_type, size) = self._jumps.pop()
  83 + if jump_type:
  84 + self.read(size - (self._pos - start))
  85 + else:
  86 + align = (self._pos - start) % size
  87 + if align:
  88 + self.read(size - align)
  89 +
  90 + def unpacks(self, format, size):
  91 + return struct.unpack(format, self.read(size))
  92 +
  93 + def unpack(self, format, size):
  94 + return self.unpacks(format, size)[0]
  95 +
  96 + def raise_error(self, reason, back=0):
  97 + raise OleFormParsingError('{0}:{1}: {2}'.format(self.path, self._pos - back))
  98 +
  99 + def check_values(self, name, format, size, expected):
  100 + value = self.unpacks(format, size)
  101 + if value != expected:
  102 + self.raise_error('Invalid {0}: expected {1} got {2}'.format(name, str(expected), str(value)))
  103 +
  104 + def check_value(self, name, format, size, expected):
  105 + self.check_values(name, format, size, (expected,))
  106 +
  107 +
  108 +def consume_TextProps(stream):
  109 + # TextProps: [MS-OFORMS] 2.3.1
  110 + stream.check_values('TextProps (versions)', '<BB', 2, (0, 2))
  111 + cbTextProps = stream.unpack('<H', 2)
  112 + stream.read(cbTextProps)
  113 +
  114 +def consume_GuidAndFont(stream):
  115 + # GuidAndFont: [MS-OFORMS] 2.4.7
  116 + UUIDS = stream.unpacks('<LHH', 8) + stream.unpacks('>Q', 8)
  117 + if UUIDS == (199447043, 36753, 4558, 11376937813817407569L):
  118 + # UUID == {0BE35203-8F91-11CE-9DE300AA004BB851}
  119 + # StdFont: [MS-OFORMS] 2.4.12
  120 + stream.check_value('StdFont (version)', '<B', 1, 1)
  121 + # Skip sCharset, bFlags, sWeight, ulHeight
  122 + stream.read(9)
  123 + bFaceLen = stream.unpack('<B', 1)
  124 + stream.read(bFaceLen)
  125 + elif UUIDs == (2948729120, 55886, 4558, 13349514450607572916L):
  126 + # UUID == {AFC20920-DA4E-11CE-B94300AA006887B4}
  127 + consume_TextProps(stream)
  128 + else:
  129 + stream.raise_error('Invalid GuidAndFont (UUID)', 16)
  130 +
  131 +def consume_GuidAndPicture(stream):
  132 + # GuidAndPicture: [MS-OFORMS] 2.4.8
  133 + # UUID == {0BE35204-8F91-11CE-9DE3-00AA004BB851}
  134 + stream.check_values('GuidAndPicture (UUID part 1)', '<LHH', 8, (199447044, 36753, 4558))
  135 + stream.check_value('GuidAndPicture (UUID part 1)', '>Q', 8, 11376937813817407569L)
  136 + # StdPicture: [MS-OFORMS] 2.4.13
  137 + stream.check_value('StdPicture (Preamble)', '<L', 4, 0x0000746C)
  138 + size = stream.unpack('<L', 4)
  139 + stream.read(size)
  140 +
  141 +def consume_CountOfBytesWithCompressionFlag(stream):
  142 + # CountOfBytesWithCompressionFlag or CountOfCharsWithCompressionFlag: [MS-OFORMS] 2.4.14.2 or 2.4.14.3
  143 + count = stream.unpack('<L', 4)
  144 + if not count & 0x80000000 and count != 0:
  145 + stream.aise_error('Uncompress string length', 4)
  146 + return count & 0x7FFFFFFF
  147 +
  148 +def consume_SiteClassInfo(stream):
  149 + # SiteClassInfo: [MS-OFORMS] 2.2.10.10.1
  150 + stream.check_value('SiteClassInfo (version)', '<H', 2, 0)
  151 + cbClassTable = stream.unpack('<H', 2)
  152 + stream.read(cbClassTable)
  153 +
  154 +def consume_FormObjectDepthTypeCount(stream):
  155 + # FormObjectDepthTypeCount: [MS-OFORMS] 2.2.10.7
  156 + (depth, mixed) = stream.unpacks('<BB', 2)
  157 + if mixed & 0x80:
  158 + stream.check_value('FormObjectDepthTypeCount (SITE_TYPE)', '<B', 1, 1)
  159 + return mixed ^ 0x80
  160 + if mixed != 1:
  161 + stream.raise_error('Invalid FormObjectDepthTypeCount (SITE_TYPE): expected 1 got {0}'.format(str(mixed)))
  162 + return 1
  163 +
  164 +def consume_OleSiteConcreteControl(stream):
  165 + # OleSiteConcreteControl: [MS-OFORMS] 2.2.10.12.1
  166 + stream.check_value('OleSiteConcreteControl (version)', '<H', 2, 0)
  167 + cbSite = stream.unpack('<H', 2)
  168 + with stream.will_jump_to(cbSite):
  169 + propmask = SitePropMask(stream.unpack('<L', 4))
  170 + # SiteDataBlock: [MS-OFORMS] 2.2.10.12.3
  171 + name_len = tag_len = id = 0
  172 + if propmask.fName:
  173 + name_len = consume_CountOfBytesWithCompressionFlag(stream)
  174 + if propmask.fTag:
  175 + tag_len = consume_CountOfBytesWithCompressionFlag(stream)
  176 + if propmask.fID:
  177 + id = stream.unpack('<L', 4)
  178 + for prop in ['fHelpContextID', 'fBitFlags', 'fObjectStreamSize']:
  179 + if propmask[prop]:
  180 + stream.read(4)
  181 + tabindex = ClsidCacheIndex = 0
  182 + with stream.will_pad():
  183 + if propmask.fTabIndex:
  184 + tabindex = stream.unpack('<H', 2)
  185 + if propmask.fClsidCacheIndex:
  186 + ClsidCacheIndex = stream.unpack('<H', 2)
  187 + if propmask.fGroupID:
  188 + stream.read(2)
  189 + # For the next 4 entries, the documentation adds padding, but it should already be aligned??
  190 + for prop in ['fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource']:
  191 + if propmask[prop]:
  192 + stream.read(4)
  193 + # SiteExtraDataBlock: [MS-OFORMS] 2.2.10.12.4
  194 + name = stream.read(name_len)
  195 + tag = stream.read(tag_len)
  196 + return {'name': name, 'tag': tag, 'id': id, 'tabindex': tabindex,
  197 + 'ClsidCacheIndex': ClsidCacheIndex}
  198 +
  199 +def consume_FormControl(stream):
  200 + # FormControl: [MS-OFORMS] 2.2.10.1
  201 + stream.check_values('FormControl (versions)', '<BB', 2, (0, 4))
  202 + cbform = stream.unpack('<H', 2)
  203 + with stream.will_jump_to(cbform):
  204 + propmask = FormPropMask(stream.unpack('<L', 4))
  205 + # FormDataBlock: [MS-OFORMS] 2.2.10.3
  206 + for prop in ['fBackColor', 'fForeColor', 'fNextAvailableID']:
  207 + if propmask[prop]:
  208 + stream.read(4)
  209 + if propmask.fBooleanProperties:
  210 + BooleanProperties = stream.unpack('<L', 4)
  211 + FORM_FLAG_DONTSAVECLASSTABLE = (BooleanProperties & (1<<15)) >> 15
  212 + else:
  213 + FORM_FLAG_DONTSAVECLASSTABLE = 0
  214 + # Skip the rest of DataBlock and ExtraDataBlock
  215 + # FormStreamData: [MS-OFORMS] 2.2.10.5
  216 + if propmask.fMouseIcon:
  217 + consume_GuidAndPicture(stream)
  218 + if propmask.fFont:
  219 + consume_GuidAndFont(stream)
  220 + if propmask.fPicture:
  221 + consume_GuidAndPicture(stream)
  222 + # FormSiteData: [MS-OFORMS] 2.2.10.6
  223 + if not FORM_FLAG_DONTSAVECLASSTABLE:
  224 + CountOfSiteClassInfo = stream.unpack('<H', 2)
  225 + for i in range(CountOfSiteClassInfo):
  226 + consume_SiteClassInfo(stream)
  227 + (CountOfSites, CountOfBytes) = stream.unpacks('<LL', 8)
  228 + remaining_SiteDepthsAndTypes = CountOfSites
  229 + with stream.will_pad():
  230 + while remaining_SiteDepthsAndTypes > 0:
  231 + remaining_SiteDepthsAndTypes -= consume_FormObjectDepthTypeCount(stream)
  232 + for i in range(CountOfSites):
  233 + yield consume_OleSiteConcreteControl(stream)
  234 +
  235 +def consume_MorphDataControl(stream):
  236 + # MorphDataControl: [MS-OFORMS] 2.2.5.1
  237 + stream.check_values('MorphDataControl (versions)', '<BB', 2, (0, 2))
  238 + cbMorphData = stream.unpack('<H', 2)
  239 + with stream.will_jump_to(cbMorphData):
  240 + propmask = MorphDataPropMask(stream.unpack('<Q', 8))
  241 + # MorphDataDataBlock: [MS-OFORMS] 2.2.5.3
  242 + for prop in ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength']:
  243 + if propmask[prop]:
  244 + stream.read(4)
  245 + with stream.will_pad():
  246 + for prop in ['fBorderStyle', 'fScrollBars', 'fDisplayStyle', 'fMousePointer']:
  247 + if propmask[prop]:
  248 + stream.read(1)
  249 + # PasswordChar, BoundColumn, TextColumn, ColumnCount, and ListRows are 2B + pad = 4B
  250 + # ListWidth is 4B + pad = 4B
  251 + for prop in ['fPasswordChar', 'fListWidth', 'fBoundColumn', 'fTextColumn', 'fColumnCount',
  252 + 'fListRows']:
  253 + if propmask[prop]:
  254 + stream.read(4)
  255 + with stream.will_pad():
  256 + if propmask.fcColumnInfo:
  257 + stream.read(2)
  258 + for prop in ['fMatchEntry', 'fListStyle', 'fShowDropButtonWhen', 'fDropButtonStyle',
  259 + 'fMultiSelect']:
  260 + if propmask[prop]:
  261 + stream.read(1)
  262 + if propmask.fValue:
  263 + value_size = consume_CountOfBytesWithCompressionFlag(stream)
  264 + else:
  265 + value_size = 0
  266 + # Caption, PicturePosition, BorderColor, SpecialEffect, GroupName are 4B + pad = 4B
  267 + # MouseIcon, Picture, Accelerator are 2B + pad = 4B
  268 + for prop in ['fCaption', 'fPicturePosition', 'fBorderColor', 'fSpecialEffect',
  269 + 'fMouseIcon', 'fPicture', 'fAccelerator', 'fGroupName']:
  270 + if propmask[prop]:
  271 + stream.read(4)
  272 + # MorphDataExtraDataBlock: [MS-OFORMS] 2.2.5.4
  273 + stream.read(8)
  274 + value = stream.read(value_size)
  275 + # MorphDataStreamData: [MS-OFORMS] 2.2.5.5
  276 + if propmask.fMouseIcon:
  277 + consume_GuidAndPicture(stream)
  278 + if propmask.fPicture:
  279 + consume_GuidAndPicture(stream)
  280 + consume_TextProps(stream)
  281 + return value
  282 +
  283 +def extract_OleFormVariables(ole_file, stream_dir):
  284 + control = ExtendedStream.open(ole_file, '/'.join(stream_dir + ['f']))
  285 + variables = list(consume_FormControl(control))
  286 + data = ExtendedStream.open(ole_file, '/'.join(stream_dir + ['o']))
  287 + for var in variables:
  288 + if var['ClsidCacheIndex'] != 23:
  289 + raise OleFormParsingError('Unsupported stored type: {0}'.format(str(var['ClsidCacheIndex'])))
  290 + var['value'] = consume_MorphDataControl(data)
  291 + return variables
oletools/olevba.py
@@ -196,9 +196,10 @@ from __future__ import print_function @@ -196,9 +196,10 @@ from __future__ import print_function
196 # 2017-05-31 c1fe: - PR #135 fixing issue #132 for some Mac files 196 # 2017-05-31 c1fe: - PR #135 fixing issue #132 for some Mac files
197 # 2017-06-08 PL: - fixed issue #122 Chr() with negative numbers 197 # 2017-06-08 PL: - fixed issue #122 Chr() with negative numbers
198 # 2017-06-15 PL: - deobfuscation line by line to handle large files 198 # 2017-06-15 PL: - deobfuscation line by line to handle large files
199 -# 2017-07-11 v0.51.1 PL: - raise exception instead of sys.exit (issue #180) 199 +# 2017-07-11 v0.52 PL: - raise exception instead of sys.exit (issue #180)
  200 +# 2017-11-08 VB: - PR #124 adding user form parsing (Vincent Brillault)
200 201
201 -__version__ = '0.51.1dev1' 202 +__version__ = '0.52dev3'
202 203
203 #------------------------------------------------------------------------------ 204 #------------------------------------------------------------------------------
204 # TODO: 205 # TODO:
@@ -265,6 +266,8 @@ except ImportError: @@ -265,6 +266,8 @@ except ImportError:
265 + "see http://codespeak.net/lxml " \ 266 + "see http://codespeak.net/lxml " \
266 + "or http://effbot.org/zone/element-index.htm") 267 + "or http://effbot.org/zone/element-index.htm")
267 268
  269 +from oleform import extract_OleFormVariables
  270 +
268 # IMPORTANT: it should be possible to run oletools directly as scripts 271 # IMPORTANT: it should be possible to run oletools directly as scripts
269 # in any directory without installing them with pip or setup.py. 272 # in any directory without installing them with pip or setup.py.
270 # In that case, relative imports are NOT usable. 273 # In that case, relative imports are NOT usable.
@@ -1465,7 +1468,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1465,7 +1468,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1465 # So let's ignore it, otherwise it crashes on some files (issue #132) 1468 # So let's ignore it, otherwise it crashes on some files (issue #132)
1466 # PR #135 by @c1fe: 1469 # PR #135 by @c1fe:
1467 # contrary to the specification I think that the unicode name 1470 # contrary to the specification I think that the unicode name
1468 - # is optional. if reference_reserved is not 0x003E I think it 1471 + # is optional. if reference_reserved is not 0x003E I think it
1469 # is actually the start of another REFERENCE record 1472 # is actually the start of another REFERENCE record
1470 # at least when projectsyskind_syskind == 0x02 (Macintosh) 1473 # at least when projectsyskind_syskind == 0x02 (Macintosh)
1471 if reference_reserved == 0x003E: 1474 if reference_reserved == 0x003E:
@@ -2986,6 +2989,24 @@ class VBA_Parser(object): @@ -2986,6 +2989,24 @@ class VBA_Parser(object):
2986 log.debug('Printable string found in form: %r' % m.group()) 2989 log.debug('Printable string found in form: %r' % m.group())
2987 yield (self.filename, '/'.join(o_stream), m.group()) 2990 yield (self.filename, '/'.join(o_stream), m.group())
2988 2991
  2992 + def extract_form_strings_extended(self):
  2993 + if self.ole_file is None:
  2994 + # This may be either an OpenXML/PPT or a text file:
  2995 + if self.type == TYPE_TEXT:
  2996 + # This is a text file, return no results:
  2997 + return
  2998 + else:
  2999 + # OpenXML/PPT: recursively yield results from each OLE subfile:
  3000 + for ole_subfile in self.ole_subfiles:
  3001 + for results in ole_subfile.extract_form_strings_extended():
  3002 + yield results
  3003 + else:
  3004 + # This is an OLE file:
  3005 + self.find_vba_forms()
  3006 + ole = self.ole_file
  3007 + for form_storage in self.vba_forms:
  3008 + for variable in extract_OleFormVariables(ole, form_storage):
  3009 + yield (self.filename, '/'.join(form_storage), variable)
2989 3010
2990 def close(self): 3011 def close(self):
2991 """ 3012 """
@@ -3115,6 +3136,11 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3115,6 +3136,11 @@ class VBA_Parser_CLI(VBA_Parser):
3115 print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)) 3136 print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path))
3116 print('- ' * 39) 3137 print('- ' * 39)
3117 print(form_string) 3138 print(form_string)
  3139 + for (subfilename, stream_path, form_variables) in self.extract_form_strings_extended():
  3140 + print('-' * 79)
  3141 + print('VBA FORM Variable "%s" IN %r - OLE stream: %r' % (form_variables['name'], subfilename, stream_path))
  3142 + print('- ' * 39)
  3143 + print(str(form_variables['value']))
3118 if not vba_code_only: 3144 if not vba_code_only:
3119 # analyse the code from all modules at once: 3145 # analyse the code from all modules at once:
3120 self.print_analysis(show_decoded_strings, deobfuscate) 3146 self.print_analysis(show_decoded_strings, deobfuscate)
setup.py
@@ -42,7 +42,7 @@ import os, fnmatch @@ -42,7 +42,7 @@ import os, fnmatch
42 #--- METADATA ----------------------------------------------------------------- 42 #--- METADATA -----------------------------------------------------------------
43 43
44 name = "oletools" 44 name = "oletools"
45 -version = '0.52dev2' 45 +version = '0.52dev3'
46 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" 46 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR"
47 long_desc = open('oletools/README.rst').read() 47 long_desc = open('oletools/README.rst').read()
48 author = "Philippe Lagadec" 48 author = "Philippe Lagadec"