Commit dbd2a780c8e9d47f7bad0093def80fb365755358

Authored by Vincent Brillault
1 parent 5525fef1

WIP: Extract variable name and value from UserForm

This is a very early draft of a new feature: parsing UserForms.
No guarantee attached (is likely to fail on paths I didn't test...)
oletools/oleform.py 0 → 100644
  1 +#!/usr/bin/env python
  2 +
  3 +import struct
  4 +
  5 +class OleFormParsingError(Exception):
  6 + pass
  7 +
  8 +class Mask(object):
  9 + def __init__(self, val):
  10 + self._val = [(val & (1<<i))>>i for i in range(32)]
  11 +
  12 + def __str__(self):
  13 + return ', '.join(self._names[i] for i in range(32) if self._val[i])
  14 +
  15 + def __getattr__(self, name):
  16 + return self._val[self._names.index(name)]
  17 +
  18 +class PropMask(Mask):
  19 + _names = ['Unused1', 'fBackColor', 'fForeColor', 'fNextAvailableID', 'Unused2_0', 'Unused2_1',
  20 + 'fBooleanProperties', 'fBooleanProperties', 'fMousePointer', 'fScrollBars',
  21 + 'fDisplayedSize', 'fLogicalSize', 'fScrollPosition', 'fGroupCnt', 'Reserved',
  22 + 'fMouseIcon', 'fCycle', 'fSpecialEffect', 'fBorderColor', 'fCaption', 'fFont',
  23 + 'fPicture', 'fZoom', 'fPictureAlignment', 'fPictureTiling', 'fPictureSizeMode',
  24 + 'fShapeCookie', 'fDrawBuffer', 'Unused3_0', 'Unused3_1', 'Unused3_2', 'Unused3_3']
  25 +
  26 +class SitePropMask(Mask):
  27 + _names = ['fName', 'fTag', 'fID', 'fHelpContextID', 'fBitFlags', 'fObjectStreamSize',
  28 + 'fTabIndex', 'fClsidCacheIndex', 'fPosition', 'fGroupID', 'Unused1',
  29 + 'fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource', 'Unused2_0',
  30 + 'Unused2_1', 'Unused2_2', 'Unused2_3', 'Unused2_4', 'Unused2_5', 'Unused2_6',
  31 + 'Unused2_7', 'Unused2_8', 'Unused2_9', 'Unused2_10', 'Unused2_11', 'Unused2_12',
  32 + 'Unused2_13', 'Unused2_14', 'Unused2_15', 'Unused2_16']
  33 +
  34 +class OleUserFormParser(object):
  35 + def __init__(self, stream):
  36 + self.content = []
  37 + self._stream = stream
  38 + self._pos = 0
  39 + self._frozen_pos = []
  40 +
  41 + def read(self, size):
  42 + self._pos += size
  43 + return self._stream.read(size)
  44 +
  45 + def freeze(self):
  46 + self._frozen_pos.append(self._pos)
  47 +
  48 + def unfreeze(self, size):
  49 + self.read(self._frozen_pos.pop() - self._pos + size)
  50 +
  51 + def unfreeze_pad(self):
  52 + align_pos = (self._pos - self._frozen_pos.pop()) % 4
  53 + if align_pos:
  54 + self.read(4 - align_pos)
  55 +
  56 + def unpacks(self, format, size):
  57 + return struct.unpack(format, self.read(size))
  58 +
  59 + def unpack(self, format, size):
  60 + return self.unpacks(format, size)[0]
  61 +
  62 + def check_values(self, name, format, size, expected):
  63 + value = self.unpacks(format, size)
  64 + if value != expected:
  65 + raise OleFormParsingError('Invalid {0} at {1}: expected {2} got {3}'.format(name, self._pos - size, str(expected), str(value)))
  66 +
  67 + def check_value(self, name, format, size, expected):
  68 + self.check_values(name, format, size, (expected,))
  69 +
  70 + def consume_GuidAndFont(self):
  71 + # GuidAndFont: [MS-OFORMS] 2.4.7
  72 + UUIDS = self.unpacks('<LHH', 8) + self.unpacks('>Q', 8)
  73 + if UUIDS == (199447043, 36753, 4558, 11376937813817407569L):
  74 + # UUID == {0BE35203-8F91-11CE-9DE300AA004BB851}
  75 + # StdFont: [MS-OFORMS] 2.4.12
  76 + self.check_value('StdFont (version)', '<B', 1, 1)
  77 + # Skip sCharset, bFlags, sWeight, ulHeight
  78 + self.read(9)
  79 + bFaceLen = self.unpack('<B', 1)
  80 + self.read(bFaceLen)
  81 + elif UUIDs == (2948729120, 55886, 4558, 13349514450607572916L):
  82 + # UUID == {AFC20920-DA4E-11CE-B94300AA006887B4}
  83 + # TextProps: [MS-OFORMS] 2.3.1
  84 + self.check_value('TextProps (versions)', '<BB', 2, (0, 2))
  85 + cbTextProps = self.unpack('<H', 2)
  86 + self.read(cbTextProps)
  87 + else:
  88 + raise OleFormParsingError('Invalid GuidAndFont at {0}: UUID'.format(self._pos - 16))
  89 +
  90 + def consume_GuidAndPicture(self):
  91 + # GuidAndPicture: [MS-OFORMS] 2.4.8
  92 + # UUID == {0BE35204-8F91-11CE-9DE3-00AA004BB851}
  93 + self.check_values('GuidAndPicture (UUID part 1)', '<LHH', 8, (199447044, 36753, 4558))
  94 + self.check_value('GuidAndPicture (UUID part 1)', '>Q', 8, 11376937813817407569L)
  95 + # StdPicture: [MS-OFORMS] 2.4.13
  96 + self.check_value('StdPicture (Preamble)', '<L', 4, 0x0000746C)
  97 + size = self.unpack('<L', 4)
  98 + self.read(size)
  99 +
  100 + def consume_CountOfBytesWithCompressionFlag(self):
  101 + # CountOfBytesWithCompressionFlag or CountOfCharsWithCompressionFlag: [MS-OFORMS] 2.4.14.2 or 2.4.14.3
  102 + count = self.unpack('<L', 4)
  103 + if not count & 0x80000000 and count != 0:
  104 + print(count)
  105 + raise OleFormParsingError('Uncompress string length at {0}', self._pos - 4)
  106 + return count & 0x7FFFFFFF
  107 +
  108 + def consume_SiteClassInfo(self):
  109 + # SiteClassInfo: [MS-OFORMS] 2.2.10.10.1
  110 + self.check_value('SiteClassInfo (version)', '<H', 2, 0)
  111 + cbClassTable = self.unpack('<H', 2)
  112 + self.read(cbClassTable)
  113 +
  114 + def consume_FormObjectDepthTypeCount(self):
  115 + # FormObjectDepthTypeCount: [MS-OFORMS] 2.2.10.7
  116 + (depth, mixed) = self.unpacks('<BB', 2)
  117 + if mixed & 0x80:
  118 + self.check_value('FormObjectDepthTypeCount (SITE_TYPE)', '<B', 1, 1)
  119 + return mixed ^ 0x80
  120 + if mixed != 1:
  121 + raise OleFormParsingError('Invalid FormObjectDepthTypeCount (SITE_TYPE) at {0}: expected 1 got {3}'.format(self._pos - 2, str(mixed)))
  122 + return 1
  123 +
  124 + def consume_OleSiteConcreteControl(self):
  125 + # OleSiteConcreteControl: [MS-OFORMS] 2.2.10.12.1
  126 + self.check_value('OleSiteConcreteControl (version)', '<H', 2, 0)
  127 + cbSite = self.unpack('<H', 2)
  128 + self.freeze()
  129 + sitepropmask = SitePropMask(self.unpack('<L', 4))
  130 + # SiteDataBlock: [MS-OFORMS] 2.2.10.12.3
  131 + name_len = tag_len = id = 0
  132 + if sitepropmask.fName:
  133 + name_len = self.consume_CountOfBytesWithCompressionFlag()
  134 + if sitepropmask.fTag:
  135 + tag_len = self.consume_CountOfBytesWithCompressionFlag()
  136 + if sitepropmask.fID:
  137 + id = self.unpack('<L', 4)
  138 + if sitepropmask.fHelpContextID:
  139 + self.read(4)
  140 + if sitepropmask.fBitFlags:
  141 + self.read(4)
  142 + if sitepropmask.fObjectStreamSize:
  143 + self.read(4)
  144 + tabindex = ClsidCacheIndex = 0
  145 + self.freeze()
  146 + if sitepropmask.fTabIndex:
  147 + tabindex = self.unpack('<H', 2)
  148 + if sitepropmask.fClsidCacheIndex:
  149 + ClsidCacheIndex = self.unpack('<H', 2)
  150 + if sitepropmask.fGroupID:
  151 + self.read(2)
  152 + self.unfreeze_pad()
  153 + # For the next 4 entries, the documentation adds padding, but it should already be aligned??
  154 + if sitepropmask.fControlTipText:
  155 + self.read(4)
  156 + if sitepropmask.fRuntimeLicKey:
  157 + self.read(4)
  158 + if sitepropmask.fControlSource:
  159 + self.read(4)
  160 + if sitepropmask.fRowSource:
  161 + self.read(4)
  162 + # SiteExtraDataBlock: [MS-OFORMS] 2.2.10.12.4
  163 + name = self.read(name_len)
  164 + tag = self.read(tag_len)
  165 + self.content.append({'name': name, 'tag': tag, 'id': id,
  166 + 'tabindex': tabindex,
  167 + 'ClsidCacheIndex': ClsidCacheIndex})
  168 + self.unfreeze(cbSite)
  169 +
  170 + def consume_FormControl(self):
  171 + # FormControl: [MS-OFORMS] 2.2.10.1
  172 + self.check_values('FormControl (versions)', '<BB', 2, (0, 4))
  173 + cbform = self.unpack('<H', 2)
  174 + self.freeze()
  175 + propmask = PropMask(self.unpack('<L', 4))
  176 + # FormDataBlock: [MS-OFORMS] 2.2.10.3
  177 + if propmask.fBackColor:
  178 + self.read(4)
  179 + if propmask.fForeColor:
  180 + self.read(4)
  181 + if propmask.fNextAvailableID:
  182 + self.read(4)
  183 + if propmask.fBooleanProperties:
  184 + BooleanProperties = self.unpack('<L', 4)
  185 + FORM_FLAG_DONTSAVECLASSTABLE = (BooleanProperties & (1<<15)) >> 15
  186 + else:
  187 + FORM_FLAG_DONTSAVECLASSTABLE = 0
  188 + # Skip the rest of DataBlock and ExtraDataBlock
  189 + self.unfreeze(cbform)
  190 + # FormStreamData: [MS-OFORMS] 2.2.10.5
  191 + if propmask.fMouseIcon:
  192 + self.consume_GuidAndPicture()
  193 + if propmask.fFont:
  194 + self.consume_GuidAndFont()
  195 + if propmask.fPicture:
  196 + self.consume_GuidAndPicture()
  197 + # FormSiteData: [MS-OFORMS] 2.2.10.6
  198 + if not FORM_FLAG_DONTSAVECLASSTABLE:
  199 + CountOfSiteClassInfo = self.unpack('<H', 2)
  200 + for i in range(CountOfSiteClassInfo):
  201 + self.consume_SiteClassInfo()
  202 + (CountOfSites, CountOfBytes) = self.unpacks('<LL', 8)
  203 + remaining_SiteDepthsAndTypes = CountOfSites
  204 + self.freeze()
  205 + while remaining_SiteDepthsAndTypes > 0:
  206 + remaining_SiteDepthsAndTypes -= self.consume_FormObjectDepthTypeCount()
  207 + self.unfreeze_pad()
  208 + for i in range(CountOfSites):
  209 + self.consume_OleSiteConcreteControl()
  210 +
  211 + def consume_stream_o(self):
  212 + # Adapted from plugin_stream_o.py from Didier Stevens's oledump.py
  213 + while(True):
  214 + try:
  215 + (code, length) = self.unpacks('<HH', 4)
  216 + except struct.error:
  217 + break
  218 + self.freeze()
  219 + if code == 0x200:
  220 + fieldtype = self.unpack('<I', 4)
  221 + if fieldtype == 0x80400101:
  222 + self.read(8)
  223 + lengthString = self.unpack('<I', 4) & 0x7FFFFFFF #self.consume_CountOfBytesWithCompressionFlag()
  224 + self.read(8)
  225 + self.content.append(self.read(lengthString))
  226 + elif fieldtype == 0x80000101:
  227 + self.content.append('')
  228 + self.unfreeze(length)
  229 +
  230 +def OleFormVariables(ole_file, stream_dir):
  231 + control_stream = ole_file.openstream('/'.join(stream_dir + ['f']))
  232 + control_form = OleUserFormParser(control_stream)
  233 + control_form.consume_FormControl()
  234 + variables = control_form.content
  235 + data_stream = ole_file.openstream('/'.join(stream_dir + ['o']))
  236 + data = OleUserFormParser(data_stream)
  237 + data.consume_stream_o()
  238 + values = data.content
  239 + if len(variables) != len(values):
  240 + raise OleFormParsingError('Incompatible number of variables: {0} VS {1}'.format(len(variables), len(values)))
  241 + for i in range(len(variables)):
  242 + variables[i]['value'] = values[i]
  243 + return variables
oletools/olevba.py
@@ -254,6 +254,8 @@ except ImportError: @@ -254,6 +254,8 @@ except ImportError:
254 + "see http://codespeak.net/lxml " \ 254 + "see http://codespeak.net/lxml " \
255 + "or http://effbot.org/zone/element-index.htm") 255 + "or http://effbot.org/zone/element-index.htm")
256 256
  257 +from oleform import OleFormVariables
  258 +
257 import thirdparty.olefile as olefile 259 import thirdparty.olefile as olefile
258 from thirdparty.prettytable import prettytable 260 from thirdparty.prettytable import prettytable
259 from thirdparty.xglob import xglob, PathNotFoundException 261 from thirdparty.xglob import xglob, PathNotFoundException
@@ -2906,6 +2908,24 @@ class VBA_Parser(object): @@ -2906,6 +2908,24 @@ class VBA_Parser(object):
2906 log.debug('Printable string found in form: %r' % m.group()) 2908 log.debug('Printable string found in form: %r' % m.group())
2907 yield (self.filename, '/'.join(o_stream), m.group()) 2909 yield (self.filename, '/'.join(o_stream), m.group())
2908 2910
  2911 + def extract_form_strings_extended(self):
  2912 + if self.ole_file is None:
  2913 + # This may be either an OpenXML/PPT or a text file:
  2914 + if self.type == TYPE_TEXT:
  2915 + # This is a text file, return no results:
  2916 + return
  2917 + else:
  2918 + # OpenXML/PPT: recursively yield results from each OLE subfile:
  2919 + for ole_subfile in self.ole_subfiles:
  2920 + for results in ole_subfile.extract_form_strings_extended():
  2921 + yield results
  2922 + else:
  2923 + # This is an OLE file:
  2924 + self.find_vba_forms()
  2925 + ole = self.ole_file
  2926 + for form_storage in self.vba_forms:
  2927 + for variable in OleFormVariables(ole, form_storage):
  2928 + yield (self.filename, '/'.join(form_storage), variable)
2909 2929
2910 def close(self): 2930 def close(self):
2911 """ 2931 """
@@ -3035,6 +3055,11 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3035,6 +3055,11 @@ class VBA_Parser_CLI(VBA_Parser):
3035 print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)) 3055 print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path))
3036 print('- ' * 39) 3056 print('- ' * 39)
3037 print(form_string) 3057 print(form_string)
  3058 + for (subfilename, stream_path, form_variables) in self.extract_form_strings_extended():
  3059 + print('-' * 79)
  3060 + print('VBA FORM Variable "%s" IN %r - OLE stream: %r' % (form_variables['name'], subfilename, stream_path))
  3061 + print('- ' * 39)
  3062 + print(str(form_variables['value']))
3038 if not vba_code_only: 3063 if not vba_code_only:
3039 # analyse the code from all modules at once: 3064 # analyse the code from all modules at once:
3040 self.print_analysis(show_decoded_strings, deobfuscate) 3065 self.print_analysis(show_decoded_strings, deobfuscate)