Commit dbd2a780c8e9d47f7bad0093def80fb365755358
1 parent
5525fef1
WIP: Extract variable name and value from UserForm
This is a very early draft of a new feature: parsing UserForms. No guarantee attached (is likely to fail on paths I didn't test...)
Showing
2 changed files
with
268 additions
and
0 deletions
oletools/oleform.py
0 → 100644
| 1 | +#!/usr/bin/env python | ||
| 2 | + | ||
| 3 | +import struct | ||
| 4 | + | ||
| 5 | +class OleFormParsingError(Exception): | ||
| 6 | + pass | ||
| 7 | + | ||
| 8 | +class Mask(object): | ||
| 9 | + def __init__(self, val): | ||
| 10 | + self._val = [(val & (1<<i))>>i for i in range(32)] | ||
| 11 | + | ||
| 12 | + def __str__(self): | ||
| 13 | + return ', '.join(self._names[i] for i in range(32) if self._val[i]) | ||
| 14 | + | ||
| 15 | + def __getattr__(self, name): | ||
| 16 | + return self._val[self._names.index(name)] | ||
| 17 | + | ||
| 18 | +class PropMask(Mask): | ||
| 19 | + _names = ['Unused1', 'fBackColor', 'fForeColor', 'fNextAvailableID', 'Unused2_0', 'Unused2_1', | ||
| 20 | + 'fBooleanProperties', 'fBooleanProperties', 'fMousePointer', 'fScrollBars', | ||
| 21 | + 'fDisplayedSize', 'fLogicalSize', 'fScrollPosition', 'fGroupCnt', 'Reserved', | ||
| 22 | + 'fMouseIcon', 'fCycle', 'fSpecialEffect', 'fBorderColor', 'fCaption', 'fFont', | ||
| 23 | + 'fPicture', 'fZoom', 'fPictureAlignment', 'fPictureTiling', 'fPictureSizeMode', | ||
| 24 | + 'fShapeCookie', 'fDrawBuffer', 'Unused3_0', 'Unused3_1', 'Unused3_2', 'Unused3_3'] | ||
| 25 | + | ||
| 26 | +class SitePropMask(Mask): | ||
| 27 | + _names = ['fName', 'fTag', 'fID', 'fHelpContextID', 'fBitFlags', 'fObjectStreamSize', | ||
| 28 | + 'fTabIndex', 'fClsidCacheIndex', 'fPosition', 'fGroupID', 'Unused1', | ||
| 29 | + 'fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource', 'Unused2_0', | ||
| 30 | + 'Unused2_1', 'Unused2_2', 'Unused2_3', 'Unused2_4', 'Unused2_5', 'Unused2_6', | ||
| 31 | + 'Unused2_7', 'Unused2_8', 'Unused2_9', 'Unused2_10', 'Unused2_11', 'Unused2_12', | ||
| 32 | + 'Unused2_13', 'Unused2_14', 'Unused2_15', 'Unused2_16'] | ||
| 33 | + | ||
| 34 | +class OleUserFormParser(object): | ||
| 35 | + def __init__(self, stream): | ||
| 36 | + self.content = [] | ||
| 37 | + self._stream = stream | ||
| 38 | + self._pos = 0 | ||
| 39 | + self._frozen_pos = [] | ||
| 40 | + | ||
| 41 | + def read(self, size): | ||
| 42 | + self._pos += size | ||
| 43 | + return self._stream.read(size) | ||
| 44 | + | ||
| 45 | + def freeze(self): | ||
| 46 | + self._frozen_pos.append(self._pos) | ||
| 47 | + | ||
| 48 | + def unfreeze(self, size): | ||
| 49 | + self.read(self._frozen_pos.pop() - self._pos + size) | ||
| 50 | + | ||
| 51 | + def unfreeze_pad(self): | ||
| 52 | + align_pos = (self._pos - self._frozen_pos.pop()) % 4 | ||
| 53 | + if align_pos: | ||
| 54 | + self.read(4 - align_pos) | ||
| 55 | + | ||
| 56 | + def unpacks(self, format, size): | ||
| 57 | + return struct.unpack(format, self.read(size)) | ||
| 58 | + | ||
| 59 | + def unpack(self, format, size): | ||
| 60 | + return self.unpacks(format, size)[0] | ||
| 61 | + | ||
| 62 | + def check_values(self, name, format, size, expected): | ||
| 63 | + value = self.unpacks(format, size) | ||
| 64 | + if value != expected: | ||
| 65 | + raise OleFormParsingError('Invalid {0} at {1}: expected {2} got {3}'.format(name, self._pos - size, str(expected), str(value))) | ||
| 66 | + | ||
| 67 | + def check_value(self, name, format, size, expected): | ||
| 68 | + self.check_values(name, format, size, (expected,)) | ||
| 69 | + | ||
| 70 | + def consume_GuidAndFont(self): | ||
| 71 | + # GuidAndFont: [MS-OFORMS] 2.4.7 | ||
| 72 | + UUIDS = self.unpacks('<LHH', 8) + self.unpacks('>Q', 8) | ||
| 73 | + if UUIDS == (199447043, 36753, 4558, 11376937813817407569L): | ||
| 74 | + # UUID == {0BE35203-8F91-11CE-9DE300AA004BB851} | ||
| 75 | + # StdFont: [MS-OFORMS] 2.4.12 | ||
| 76 | + self.check_value('StdFont (version)', '<B', 1, 1) | ||
| 77 | + # Skip sCharset, bFlags, sWeight, ulHeight | ||
| 78 | + self.read(9) | ||
| 79 | + bFaceLen = self.unpack('<B', 1) | ||
| 80 | + self.read(bFaceLen) | ||
| 81 | + elif UUIDs == (2948729120, 55886, 4558, 13349514450607572916L): | ||
| 82 | + # UUID == {AFC20920-DA4E-11CE-B94300AA006887B4} | ||
| 83 | + # TextProps: [MS-OFORMS] 2.3.1 | ||
| 84 | + self.check_value('TextProps (versions)', '<BB', 2, (0, 2)) | ||
| 85 | + cbTextProps = self.unpack('<H', 2) | ||
| 86 | + self.read(cbTextProps) | ||
| 87 | + else: | ||
| 88 | + raise OleFormParsingError('Invalid GuidAndFont at {0}: UUID'.format(self._pos - 16)) | ||
| 89 | + | ||
| 90 | + def consume_GuidAndPicture(self): | ||
| 91 | + # GuidAndPicture: [MS-OFORMS] 2.4.8 | ||
| 92 | + # UUID == {0BE35204-8F91-11CE-9DE3-00AA004BB851} | ||
| 93 | + self.check_values('GuidAndPicture (UUID part 1)', '<LHH', 8, (199447044, 36753, 4558)) | ||
| 94 | + self.check_value('GuidAndPicture (UUID part 1)', '>Q', 8, 11376937813817407569L) | ||
| 95 | + # StdPicture: [MS-OFORMS] 2.4.13 | ||
| 96 | + self.check_value('StdPicture (Preamble)', '<L', 4, 0x0000746C) | ||
| 97 | + size = self.unpack('<L', 4) | ||
| 98 | + self.read(size) | ||
| 99 | + | ||
| 100 | + def consume_CountOfBytesWithCompressionFlag(self): | ||
| 101 | + # CountOfBytesWithCompressionFlag or CountOfCharsWithCompressionFlag: [MS-OFORMS] 2.4.14.2 or 2.4.14.3 | ||
| 102 | + count = self.unpack('<L', 4) | ||
| 103 | + if not count & 0x80000000 and count != 0: | ||
| 104 | + print(count) | ||
| 105 | + raise OleFormParsingError('Uncompress string length at {0}', self._pos - 4) | ||
| 106 | + return count & 0x7FFFFFFF | ||
| 107 | + | ||
| 108 | + def consume_SiteClassInfo(self): | ||
| 109 | + # SiteClassInfo: [MS-OFORMS] 2.2.10.10.1 | ||
| 110 | + self.check_value('SiteClassInfo (version)', '<H', 2, 0) | ||
| 111 | + cbClassTable = self.unpack('<H', 2) | ||
| 112 | + self.read(cbClassTable) | ||
| 113 | + | ||
| 114 | + def consume_FormObjectDepthTypeCount(self): | ||
| 115 | + # FormObjectDepthTypeCount: [MS-OFORMS] 2.2.10.7 | ||
| 116 | + (depth, mixed) = self.unpacks('<BB', 2) | ||
| 117 | + if mixed & 0x80: | ||
| 118 | + self.check_value('FormObjectDepthTypeCount (SITE_TYPE)', '<B', 1, 1) | ||
| 119 | + return mixed ^ 0x80 | ||
| 120 | + if mixed != 1: | ||
| 121 | + raise OleFormParsingError('Invalid FormObjectDepthTypeCount (SITE_TYPE) at {0}: expected 1 got {3}'.format(self._pos - 2, str(mixed))) | ||
| 122 | + return 1 | ||
| 123 | + | ||
| 124 | + def consume_OleSiteConcreteControl(self): | ||
| 125 | + # OleSiteConcreteControl: [MS-OFORMS] 2.2.10.12.1 | ||
| 126 | + self.check_value('OleSiteConcreteControl (version)', '<H', 2, 0) | ||
| 127 | + cbSite = self.unpack('<H', 2) | ||
| 128 | + self.freeze() | ||
| 129 | + sitepropmask = SitePropMask(self.unpack('<L', 4)) | ||
| 130 | + # SiteDataBlock: [MS-OFORMS] 2.2.10.12.3 | ||
| 131 | + name_len = tag_len = id = 0 | ||
| 132 | + if sitepropmask.fName: | ||
| 133 | + name_len = self.consume_CountOfBytesWithCompressionFlag() | ||
| 134 | + if sitepropmask.fTag: | ||
| 135 | + tag_len = self.consume_CountOfBytesWithCompressionFlag() | ||
| 136 | + if sitepropmask.fID: | ||
| 137 | + id = self.unpack('<L', 4) | ||
| 138 | + if sitepropmask.fHelpContextID: | ||
| 139 | + self.read(4) | ||
| 140 | + if sitepropmask.fBitFlags: | ||
| 141 | + self.read(4) | ||
| 142 | + if sitepropmask.fObjectStreamSize: | ||
| 143 | + self.read(4) | ||
| 144 | + tabindex = ClsidCacheIndex = 0 | ||
| 145 | + self.freeze() | ||
| 146 | + if sitepropmask.fTabIndex: | ||
| 147 | + tabindex = self.unpack('<H', 2) | ||
| 148 | + if sitepropmask.fClsidCacheIndex: | ||
| 149 | + ClsidCacheIndex = self.unpack('<H', 2) | ||
| 150 | + if sitepropmask.fGroupID: | ||
| 151 | + self.read(2) | ||
| 152 | + self.unfreeze_pad() | ||
| 153 | + # For the next 4 entries, the documentation adds padding, but it should already be aligned?? | ||
| 154 | + if sitepropmask.fControlTipText: | ||
| 155 | + self.read(4) | ||
| 156 | + if sitepropmask.fRuntimeLicKey: | ||
| 157 | + self.read(4) | ||
| 158 | + if sitepropmask.fControlSource: | ||
| 159 | + self.read(4) | ||
| 160 | + if sitepropmask.fRowSource: | ||
| 161 | + self.read(4) | ||
| 162 | + # SiteExtraDataBlock: [MS-OFORMS] 2.2.10.12.4 | ||
| 163 | + name = self.read(name_len) | ||
| 164 | + tag = self.read(tag_len) | ||
| 165 | + self.content.append({'name': name, 'tag': tag, 'id': id, | ||
| 166 | + 'tabindex': tabindex, | ||
| 167 | + 'ClsidCacheIndex': ClsidCacheIndex}) | ||
| 168 | + self.unfreeze(cbSite) | ||
| 169 | + | ||
| 170 | + def consume_FormControl(self): | ||
| 171 | + # FormControl: [MS-OFORMS] 2.2.10.1 | ||
| 172 | + self.check_values('FormControl (versions)', '<BB', 2, (0, 4)) | ||
| 173 | + cbform = self.unpack('<H', 2) | ||
| 174 | + self.freeze() | ||
| 175 | + propmask = PropMask(self.unpack('<L', 4)) | ||
| 176 | + # FormDataBlock: [MS-OFORMS] 2.2.10.3 | ||
| 177 | + if propmask.fBackColor: | ||
| 178 | + self.read(4) | ||
| 179 | + if propmask.fForeColor: | ||
| 180 | + self.read(4) | ||
| 181 | + if propmask.fNextAvailableID: | ||
| 182 | + self.read(4) | ||
| 183 | + if propmask.fBooleanProperties: | ||
| 184 | + BooleanProperties = self.unpack('<L', 4) | ||
| 185 | + FORM_FLAG_DONTSAVECLASSTABLE = (BooleanProperties & (1<<15)) >> 15 | ||
| 186 | + else: | ||
| 187 | + FORM_FLAG_DONTSAVECLASSTABLE = 0 | ||
| 188 | + # Skip the rest of DataBlock and ExtraDataBlock | ||
| 189 | + self.unfreeze(cbform) | ||
| 190 | + # FormStreamData: [MS-OFORMS] 2.2.10.5 | ||
| 191 | + if propmask.fMouseIcon: | ||
| 192 | + self.consume_GuidAndPicture() | ||
| 193 | + if propmask.fFont: | ||
| 194 | + self.consume_GuidAndFont() | ||
| 195 | + if propmask.fPicture: | ||
| 196 | + self.consume_GuidAndPicture() | ||
| 197 | + # FormSiteData: [MS-OFORMS] 2.2.10.6 | ||
| 198 | + if not FORM_FLAG_DONTSAVECLASSTABLE: | ||
| 199 | + CountOfSiteClassInfo = self.unpack('<H', 2) | ||
| 200 | + for i in range(CountOfSiteClassInfo): | ||
| 201 | + self.consume_SiteClassInfo() | ||
| 202 | + (CountOfSites, CountOfBytes) = self.unpacks('<LL', 8) | ||
| 203 | + remaining_SiteDepthsAndTypes = CountOfSites | ||
| 204 | + self.freeze() | ||
| 205 | + while remaining_SiteDepthsAndTypes > 0: | ||
| 206 | + remaining_SiteDepthsAndTypes -= self.consume_FormObjectDepthTypeCount() | ||
| 207 | + self.unfreeze_pad() | ||
| 208 | + for i in range(CountOfSites): | ||
| 209 | + self.consume_OleSiteConcreteControl() | ||
| 210 | + | ||
| 211 | + def consume_stream_o(self): | ||
| 212 | + # Adapted from plugin_stream_o.py from Didier Stevens's oledump.py | ||
| 213 | + while(True): | ||
| 214 | + try: | ||
| 215 | + (code, length) = self.unpacks('<HH', 4) | ||
| 216 | + except struct.error: | ||
| 217 | + break | ||
| 218 | + self.freeze() | ||
| 219 | + if code == 0x200: | ||
| 220 | + fieldtype = self.unpack('<I', 4) | ||
| 221 | + if fieldtype == 0x80400101: | ||
| 222 | + self.read(8) | ||
| 223 | + lengthString = self.unpack('<I', 4) & 0x7FFFFFFF #self.consume_CountOfBytesWithCompressionFlag() | ||
| 224 | + self.read(8) | ||
| 225 | + self.content.append(self.read(lengthString)) | ||
| 226 | + elif fieldtype == 0x80000101: | ||
| 227 | + self.content.append('') | ||
| 228 | + self.unfreeze(length) | ||
| 229 | + | ||
| 230 | +def OleFormVariables(ole_file, stream_dir): | ||
| 231 | + control_stream = ole_file.openstream('/'.join(stream_dir + ['f'])) | ||
| 232 | + control_form = OleUserFormParser(control_stream) | ||
| 233 | + control_form.consume_FormControl() | ||
| 234 | + variables = control_form.content | ||
| 235 | + data_stream = ole_file.openstream('/'.join(stream_dir + ['o'])) | ||
| 236 | + data = OleUserFormParser(data_stream) | ||
| 237 | + data.consume_stream_o() | ||
| 238 | + values = data.content | ||
| 239 | + if len(variables) != len(values): | ||
| 240 | + raise OleFormParsingError('Incompatible number of variables: {0} VS {1}'.format(len(variables), len(values))) | ||
| 241 | + for i in range(len(variables)): | ||
| 242 | + variables[i]['value'] = values[i] | ||
| 243 | + return variables |
oletools/olevba.py
| @@ -254,6 +254,8 @@ except ImportError: | @@ -254,6 +254,8 @@ except ImportError: | ||
| 254 | + "see http://codespeak.net/lxml " \ | 254 | + "see http://codespeak.net/lxml " \ |
| 255 | + "or http://effbot.org/zone/element-index.htm") | 255 | + "or http://effbot.org/zone/element-index.htm") |
| 256 | 256 | ||
| 257 | +from oleform import OleFormVariables | ||
| 258 | + | ||
| 257 | import thirdparty.olefile as olefile | 259 | import thirdparty.olefile as olefile |
| 258 | from thirdparty.prettytable import prettytable | 260 | from thirdparty.prettytable import prettytable |
| 259 | from thirdparty.xglob import xglob, PathNotFoundException | 261 | from thirdparty.xglob import xglob, PathNotFoundException |
| @@ -2906,6 +2908,24 @@ class VBA_Parser(object): | @@ -2906,6 +2908,24 @@ class VBA_Parser(object): | ||
| 2906 | log.debug('Printable string found in form: %r' % m.group()) | 2908 | log.debug('Printable string found in form: %r' % m.group()) |
| 2907 | yield (self.filename, '/'.join(o_stream), m.group()) | 2909 | yield (self.filename, '/'.join(o_stream), m.group()) |
| 2908 | 2910 | ||
| 2911 | + def extract_form_strings_extended(self): | ||
| 2912 | + if self.ole_file is None: | ||
| 2913 | + # This may be either an OpenXML/PPT or a text file: | ||
| 2914 | + if self.type == TYPE_TEXT: | ||
| 2915 | + # This is a text file, return no results: | ||
| 2916 | + return | ||
| 2917 | + else: | ||
| 2918 | + # OpenXML/PPT: recursively yield results from each OLE subfile: | ||
| 2919 | + for ole_subfile in self.ole_subfiles: | ||
| 2920 | + for results in ole_subfile.extract_form_strings_extended(): | ||
| 2921 | + yield results | ||
| 2922 | + else: | ||
| 2923 | + # This is an OLE file: | ||
| 2924 | + self.find_vba_forms() | ||
| 2925 | + ole = self.ole_file | ||
| 2926 | + for form_storage in self.vba_forms: | ||
| 2927 | + for variable in OleFormVariables(ole, form_storage): | ||
| 2928 | + yield (self.filename, '/'.join(form_storage), variable) | ||
| 2909 | 2929 | ||
| 2910 | def close(self): | 2930 | def close(self): |
| 2911 | """ | 2931 | """ |
| @@ -3035,6 +3055,11 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -3035,6 +3055,11 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 3035 | print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)) | 3055 | print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)) |
| 3036 | print('- ' * 39) | 3056 | print('- ' * 39) |
| 3037 | print(form_string) | 3057 | print(form_string) |
| 3058 | + for (subfilename, stream_path, form_variables) in self.extract_form_strings_extended(): | ||
| 3059 | + print('-' * 79) | ||
| 3060 | + print('VBA FORM Variable "%s" IN %r - OLE stream: %r' % (form_variables['name'], subfilename, stream_path)) | ||
| 3061 | + print('- ' * 39) | ||
| 3062 | + print(str(form_variables['value'])) | ||
| 3038 | if not vba_code_only: | 3063 | if not vba_code_only: |
| 3039 | # analyse the code from all modules at once: | 3064 | # analyse the code from all modules at once: |
| 3040 | self.print_analysis(show_decoded_strings, deobfuscate) | 3065 | self.print_analysis(show_decoded_strings, deobfuscate) |