Commit dbd2a780c8e9d47f7bad0093def80fb365755358

Authored by Vincent Brillault
1 parent 5525fef1

WIP: Extract variable name and value from UserForm

This is a very early draft of a new feature: parsing UserForms.
No guarantee attached (is likely to fail on paths I didn't test...)
oletools/oleform.py 0 → 100644
  1 +#!/usr/bin/env python
  2 +
  3 +import struct
  4 +
  5 +class OleFormParsingError(Exception):
  6 + pass
  7 +
  8 +class Mask(object):
  9 + def __init__(self, val):
  10 + self._val = [(val & (1<<i))>>i for i in range(32)]
  11 +
  12 + def __str__(self):
  13 + return ', '.join(self._names[i] for i in range(32) if self._val[i])
  14 +
  15 + def __getattr__(self, name):
  16 + return self._val[self._names.index(name)]
  17 +
  18 +class PropMask(Mask):
  19 + _names = ['Unused1', 'fBackColor', 'fForeColor', 'fNextAvailableID', 'Unused2_0', 'Unused2_1',
  20 + 'fBooleanProperties', 'fBooleanProperties', 'fMousePointer', 'fScrollBars',
  21 + 'fDisplayedSize', 'fLogicalSize', 'fScrollPosition', 'fGroupCnt', 'Reserved',
  22 + 'fMouseIcon', 'fCycle', 'fSpecialEffect', 'fBorderColor', 'fCaption', 'fFont',
  23 + 'fPicture', 'fZoom', 'fPictureAlignment', 'fPictureTiling', 'fPictureSizeMode',
  24 + 'fShapeCookie', 'fDrawBuffer', 'Unused3_0', 'Unused3_1', 'Unused3_2', 'Unused3_3']
  25 +
  26 +class SitePropMask(Mask):
  27 + _names = ['fName', 'fTag', 'fID', 'fHelpContextID', 'fBitFlags', 'fObjectStreamSize',
  28 + 'fTabIndex', 'fClsidCacheIndex', 'fPosition', 'fGroupID', 'Unused1',
  29 + 'fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource', 'Unused2_0',
  30 + 'Unused2_1', 'Unused2_2', 'Unused2_3', 'Unused2_4', 'Unused2_5', 'Unused2_6',
  31 + 'Unused2_7', 'Unused2_8', 'Unused2_9', 'Unused2_10', 'Unused2_11', 'Unused2_12',
  32 + 'Unused2_13', 'Unused2_14', 'Unused2_15', 'Unused2_16']
  33 +
  34 +class OleUserFormParser(object):
  35 + def __init__(self, stream):
  36 + self.content = []
  37 + self._stream = stream
  38 + self._pos = 0
  39 + self._frozen_pos = []
  40 +
  41 + def read(self, size):
  42 + self._pos += size
  43 + return self._stream.read(size)
  44 +
  45 + def freeze(self):
  46 + self._frozen_pos.append(self._pos)
  47 +
  48 + def unfreeze(self, size):
  49 + self.read(self._frozen_pos.pop() - self._pos + size)
  50 +
  51 + def unfreeze_pad(self):
  52 + align_pos = (self._pos - self._frozen_pos.pop()) % 4
  53 + if align_pos:
  54 + self.read(4 - align_pos)
  55 +
  56 + def unpacks(self, format, size):
  57 + return struct.unpack(format, self.read(size))
  58 +
  59 + def unpack(self, format, size):
  60 + return self.unpacks(format, size)[0]
  61 +
  62 + def check_values(self, name, format, size, expected):
  63 + value = self.unpacks(format, size)
  64 + if value != expected:
  65 + raise OleFormParsingError('Invalid {0} at {1}: expected {2} got {3}'.format(name, self._pos - size, str(expected), str(value)))
  66 +
  67 + def check_value(self, name, format, size, expected):
  68 + self.check_values(name, format, size, (expected,))
  69 +
  70 + def consume_GuidAndFont(self):
  71 + # GuidAndFont: [MS-OFORMS] 2.4.7
  72 + UUIDS = self.unpacks('<LHH', 8) + self.unpacks('>Q', 8)
  73 + if UUIDS == (199447043, 36753, 4558, 11376937813817407569L):
  74 + # UUID == {0BE35203-8F91-11CE-9DE300AA004BB851}
  75 + # StdFont: [MS-OFORMS] 2.4.12
  76 + self.check_value('StdFont (version)', '<B', 1, 1)
  77 + # Skip sCharset, bFlags, sWeight, ulHeight
  78 + self.read(9)
  79 + bFaceLen = self.unpack('<B', 1)
  80 + self.read(bFaceLen)
  81 + elif UUIDs == (2948729120, 55886, 4558, 13349514450607572916L):
  82 + # UUID == {AFC20920-DA4E-11CE-B94300AA006887B4}
  83 + # TextProps: [MS-OFORMS] 2.3.1
  84 + self.check_value('TextProps (versions)', '<BB', 2, (0, 2))
  85 + cbTextProps = self.unpack('<H', 2)
  86 + self.read(cbTextProps)
  87 + else:
  88 + raise OleFormParsingError('Invalid GuidAndFont at {0}: UUID'.format(self._pos - 16))
  89 +
  90 + def consume_GuidAndPicture(self):
  91 + # GuidAndPicture: [MS-OFORMS] 2.4.8
  92 + # UUID == {0BE35204-8F91-11CE-9DE3-00AA004BB851}
  93 + self.check_values('GuidAndPicture (UUID part 1)', '<LHH', 8, (199447044, 36753, 4558))
  94 + self.check_value('GuidAndPicture (UUID part 1)', '>Q', 8, 11376937813817407569L)
  95 + # StdPicture: [MS-OFORMS] 2.4.13
  96 + self.check_value('StdPicture (Preamble)', '<L', 4, 0x0000746C)
  97 + size = self.unpack('<L', 4)
  98 + self.read(size)
  99 +
  100 + def consume_CountOfBytesWithCompressionFlag(self):
  101 + # CountOfBytesWithCompressionFlag or CountOfCharsWithCompressionFlag: [MS-OFORMS] 2.4.14.2 or 2.4.14.3
  102 + count = self.unpack('<L', 4)
  103 + if not count & 0x80000000 and count != 0:
  104 + print(count)
  105 + raise OleFormParsingError('Uncompress string length at {0}', self._pos - 4)
  106 + return count & 0x7FFFFFFF
  107 +
  108 + def consume_SiteClassInfo(self):
  109 + # SiteClassInfo: [MS-OFORMS] 2.2.10.10.1
  110 + self.check_value('SiteClassInfo (version)', '<H', 2, 0)
  111 + cbClassTable = self.unpack('<H', 2)
  112 + self.read(cbClassTable)
  113 +
  114 + def consume_FormObjectDepthTypeCount(self):
  115 + # FormObjectDepthTypeCount: [MS-OFORMS] 2.2.10.7
  116 + (depth, mixed) = self.unpacks('<BB', 2)
  117 + if mixed & 0x80:
  118 + self.check_value('FormObjectDepthTypeCount (SITE_TYPE)', '<B', 1, 1)
  119 + return mixed ^ 0x80
  120 + if mixed != 1:
  121 + raise OleFormParsingError('Invalid FormObjectDepthTypeCount (SITE_TYPE) at {0}: expected 1 got {3}'.format(self._pos - 2, str(mixed)))
  122 + return 1
  123 +
  124 + def consume_OleSiteConcreteControl(self):
  125 + # OleSiteConcreteControl: [MS-OFORMS] 2.2.10.12.1
  126 + self.check_value('OleSiteConcreteControl (version)', '<H', 2, 0)
  127 + cbSite = self.unpack('<H', 2)
  128 + self.freeze()
  129 + sitepropmask = SitePropMask(self.unpack('<L', 4))
  130 + # SiteDataBlock: [MS-OFORMS] 2.2.10.12.3
  131 + name_len = tag_len = id = 0
  132 + if sitepropmask.fName:
  133 + name_len = self.consume_CountOfBytesWithCompressionFlag()
  134 + if sitepropmask.fTag:
  135 + tag_len = self.consume_CountOfBytesWithCompressionFlag()
  136 + if sitepropmask.fID:
  137 + id = self.unpack('<L', 4)
  138 + if sitepropmask.fHelpContextID:
  139 + self.read(4)
  140 + if sitepropmask.fBitFlags:
  141 + self.read(4)
  142 + if sitepropmask.fObjectStreamSize:
  143 + self.read(4)
  144 + tabindex = ClsidCacheIndex = 0
  145 + self.freeze()
  146 + if sitepropmask.fTabIndex:
  147 + tabindex = self.unpack('<H', 2)
  148 + if sitepropmask.fClsidCacheIndex:
  149 + ClsidCacheIndex = self.unpack('<H', 2)
  150 + if sitepropmask.fGroupID:
  151 + self.read(2)
  152 + self.unfreeze_pad()
  153 + # For the next 4 entries, the documentation adds padding, but it should already be aligned??
  154 + if sitepropmask.fControlTipText:
  155 + self.read(4)
  156 + if sitepropmask.fRuntimeLicKey:
  157 + self.read(4)
  158 + if sitepropmask.fControlSource:
  159 + self.read(4)
  160 + if sitepropmask.fRowSource:
  161 + self.read(4)
  162 + # SiteExtraDataBlock: [MS-OFORMS] 2.2.10.12.4
  163 + name = self.read(name_len)
  164 + tag = self.read(tag_len)
  165 + self.content.append({'name': name, 'tag': tag, 'id': id,
  166 + 'tabindex': tabindex,
  167 + 'ClsidCacheIndex': ClsidCacheIndex})
  168 + self.unfreeze(cbSite)
  169 +
  170 + def consume_FormControl(self):
  171 + # FormControl: [MS-OFORMS] 2.2.10.1
  172 + self.check_values('FormControl (versions)', '<BB', 2, (0, 4))
  173 + cbform = self.unpack('<H', 2)
  174 + self.freeze()
  175 + propmask = PropMask(self.unpack('<L', 4))
  176 + # FormDataBlock: [MS-OFORMS] 2.2.10.3
  177 + if propmask.fBackColor:
  178 + self.read(4)
  179 + if propmask.fForeColor:
  180 + self.read(4)
  181 + if propmask.fNextAvailableID:
  182 + self.read(4)
  183 + if propmask.fBooleanProperties:
  184 + BooleanProperties = self.unpack('<L', 4)
  185 + FORM_FLAG_DONTSAVECLASSTABLE = (BooleanProperties & (1<<15)) >> 15
  186 + else:
  187 + FORM_FLAG_DONTSAVECLASSTABLE = 0
  188 + # Skip the rest of DataBlock and ExtraDataBlock
  189 + self.unfreeze(cbform)
  190 + # FormStreamData: [MS-OFORMS] 2.2.10.5
  191 + if propmask.fMouseIcon:
  192 + self.consume_GuidAndPicture()
  193 + if propmask.fFont:
  194 + self.consume_GuidAndFont()
  195 + if propmask.fPicture:
  196 + self.consume_GuidAndPicture()
  197 + # FormSiteData: [MS-OFORMS] 2.2.10.6
  198 + if not FORM_FLAG_DONTSAVECLASSTABLE:
  199 + CountOfSiteClassInfo = self.unpack('<H', 2)
  200 + for i in range(CountOfSiteClassInfo):
  201 + self.consume_SiteClassInfo()
  202 + (CountOfSites, CountOfBytes) = self.unpacks('<LL', 8)
  203 + remaining_SiteDepthsAndTypes = CountOfSites
  204 + self.freeze()
  205 + while remaining_SiteDepthsAndTypes > 0:
  206 + remaining_SiteDepthsAndTypes -= self.consume_FormObjectDepthTypeCount()
  207 + self.unfreeze_pad()
  208 + for i in range(CountOfSites):
  209 + self.consume_OleSiteConcreteControl()
  210 +
  211 + def consume_stream_o(self):
  212 + # Adapted from plugin_stream_o.py from Didier Stevens's oledump.py
  213 + while(True):
  214 + try:
  215 + (code, length) = self.unpacks('<HH', 4)
  216 + except struct.error:
  217 + break
  218 + self.freeze()
  219 + if code == 0x200:
  220 + fieldtype = self.unpack('<I', 4)
  221 + if fieldtype == 0x80400101:
  222 + self.read(8)
  223 + lengthString = self.unpack('<I', 4) & 0x7FFFFFFF #self.consume_CountOfBytesWithCompressionFlag()
  224 + self.read(8)
  225 + self.content.append(self.read(lengthString))
  226 + elif fieldtype == 0x80000101:
  227 + self.content.append('')
  228 + self.unfreeze(length)
  229 +
  230 +def OleFormVariables(ole_file, stream_dir):
  231 + control_stream = ole_file.openstream('/'.join(stream_dir + ['f']))
  232 + control_form = OleUserFormParser(control_stream)
  233 + control_form.consume_FormControl()
  234 + variables = control_form.content
  235 + data_stream = ole_file.openstream('/'.join(stream_dir + ['o']))
  236 + data = OleUserFormParser(data_stream)
  237 + data.consume_stream_o()
  238 + values = data.content
  239 + if len(variables) != len(values):
  240 + raise OleFormParsingError('Incompatible number of variables: {0} VS {1}'.format(len(variables), len(values)))
  241 + for i in range(len(variables)):
  242 + variables[i]['value'] = values[i]
  243 + return variables
... ...
oletools/olevba.py
... ... @@ -254,6 +254,8 @@ except ImportError:
254 254 + "see http://codespeak.net/lxml " \
255 255 + "or http://effbot.org/zone/element-index.htm")
256 256  
  257 +from oleform import OleFormVariables
  258 +
257 259 import thirdparty.olefile as olefile
258 260 from thirdparty.prettytable import prettytable
259 261 from thirdparty.xglob import xglob, PathNotFoundException
... ... @@ -2906,6 +2908,24 @@ class VBA_Parser(object):
2906 2908 log.debug('Printable string found in form: %r' % m.group())
2907 2909 yield (self.filename, '/'.join(o_stream), m.group())
2908 2910  
  2911 + def extract_form_strings_extended(self):
  2912 + if self.ole_file is None:
  2913 + # This may be either an OpenXML/PPT or a text file:
  2914 + if self.type == TYPE_TEXT:
  2915 + # This is a text file, return no results:
  2916 + return
  2917 + else:
  2918 + # OpenXML/PPT: recursively yield results from each OLE subfile:
  2919 + for ole_subfile in self.ole_subfiles:
  2920 + for results in ole_subfile.extract_form_strings_extended():
  2921 + yield results
  2922 + else:
  2923 + # This is an OLE file:
  2924 + self.find_vba_forms()
  2925 + ole = self.ole_file
  2926 + for form_storage in self.vba_forms:
  2927 + for variable in OleFormVariables(ole, form_storage):
  2928 + yield (self.filename, '/'.join(form_storage), variable)
2909 2929  
2910 2930 def close(self):
2911 2931 """
... ... @@ -3035,6 +3055,11 @@ class VBA_Parser_CLI(VBA_Parser):
3035 3055 print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path))
3036 3056 print('- ' * 39)
3037 3057 print(form_string)
  3058 + for (subfilename, stream_path, form_variables) in self.extract_form_strings_extended():
  3059 + print('-' * 79)
  3060 + print('VBA FORM Variable "%s" IN %r - OLE stream: %r' % (form_variables['name'], subfilename, stream_path))
  3061 + print('- ' * 39)
  3062 + print(str(form_variables['value']))
3038 3063 if not vba_code_only:
3039 3064 # analyse the code from all modules at once:
3040 3065 self.print_analysis(show_decoded_strings, deobfuscate)
... ...