Commit 7909381f67d8b4d5e43e26ce4bd5fc53678b24ee
1 parent
d0d4c87f
merged PR #124
Showing
3 changed files
with
321 additions
and
4 deletions
oletools/oleform.py
0 → 100644
| 1 | +#!/usr/bin/env python | |
| 2 | + | |
| 3 | +import struct | |
| 4 | + | |
| 5 | +class OleFormParsingError(Exception): | |
| 6 | + pass | |
| 7 | + | |
| 8 | +class Mask(object): | |
| 9 | + def __init__(self, val): | |
| 10 | + self._val = [(val & (1<<i))>>i for i in range(self._size)] | |
| 11 | + | |
| 12 | + def __str__(self): | |
| 13 | + return ', '.join(self._names[i] for i in range(self._size) if self._val[i]) | |
| 14 | + | |
| 15 | + def __getattr__(self, name): | |
| 16 | + return self._val[self._names.index(name)] | |
| 17 | + | |
| 18 | + def __len__(self): | |
| 19 | + return self.size | |
| 20 | + | |
| 21 | + def __getitem__(self, key): | |
| 22 | + return self._val[self._names.index(key)] | |
| 23 | + | |
| 24 | +class FormPropMask(Mask): | |
| 25 | + """FormPropMask: [MS-OFORMS] 2.2.10.2""" | |
| 26 | + _size = 28 | |
| 27 | + _names = ['Unused1', 'fBackColor', 'fForeColor', 'fNextAvailableID', 'Unused2_0', 'Unused2_1', | |
| 28 | + 'fBooleanProperties', 'fBooleanProperties', 'fMousePointer', 'fScrollBars', | |
| 29 | + 'fDisplayedSize', 'fLogicalSize', 'fScrollPosition', 'fGroupCnt', 'Reserved', | |
| 30 | + 'fMouseIcon', 'fCycle', 'fSpecialEffect', 'fBorderColor', 'fCaption', 'fFont', | |
| 31 | + 'fPicture', 'fZoom', 'fPictureAlignment', 'fPictureTiling', 'fPictureSizeMode', | |
| 32 | + 'fShapeCookie', 'fDrawBuffer'] | |
| 33 | + | |
| 34 | +class SitePropMask(Mask): | |
| 35 | + """SitePropMask: [MS-OFORMS] 2.2.10.12.2""" | |
| 36 | + _size = 15 | |
| 37 | + _names = ['fName', 'fTag', 'fID', 'fHelpContextID', 'fBitFlags', 'fObjectStreamSize', | |
| 38 | + 'fTabIndex', 'fClsidCacheIndex', 'fPosition', 'fGroupID', 'Unused1', | |
| 39 | + 'fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource'] | |
| 40 | + | |
| 41 | +class MorphDataPropMask(Mask): | |
| 42 | + """MorphDataPropMask: [MS-OFORMS] 2.2.5.2""" | |
| 43 | + _size = 33 | |
| 44 | + _names = ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength', 'fBorderStyle', | |
| 45 | + 'fScrollBars', 'fDisplayStyle', 'fMousePointer', 'fSize', 'fPasswordChar', | |
| 46 | + 'fListWidth', 'fBoundColumn', 'fTextColumn', 'fColumnCount', 'fListRows', | |
| 47 | + 'fcColumnInfo', 'fMatchEntry', 'fListStyle', 'fShowDropButtonWhen', 'UnusedBits1', | |
| 48 | + 'fDropButtonStyle', 'fMultiSelect', 'fValue', 'fCaption', 'fPicturePosition', | |
| 49 | + 'fBorderColor', 'fSpecialEffect', 'fMouseIcon', 'fPicture', 'fAccelerator', | |
| 50 | + 'UnusedBits2', 'Reserved', 'fGroupName'] | |
| 51 | + | |
| 52 | +class ExtendedStream(object): | |
| 53 | + def __init__(self, stream, path): | |
| 54 | + self._pos = 0 | |
| 55 | + self._jumps = [] | |
| 56 | + self._stream = stream | |
| 57 | + self._path = path | |
| 58 | + | |
| 59 | + @classmethod | |
| 60 | + def open(cls, ole_file, path): | |
| 61 | + stream = ole_file.openstream(path) | |
| 62 | + return cls(stream, path) | |
| 63 | + | |
| 64 | + def read(self, size): | |
| 65 | + self._pos += size | |
| 66 | + return self._stream.read(size) | |
| 67 | + | |
| 68 | + def will_jump_to(self, size): | |
| 69 | + self._next_jump = (True, size) | |
| 70 | + return self | |
| 71 | + | |
| 72 | + def will_pad(self, pad=4): | |
| 73 | + self._next_jump = (False, pad) | |
| 74 | + return self | |
| 75 | + | |
| 76 | + def __enter__(self): | |
| 77 | + (jump_type, size) = self._next_jump | |
| 78 | + self._jumps.append((self._pos, jump_type, size)) | |
| 79 | + | |
| 80 | + def __exit__(self, exc_type, exc_value, traceback): | |
| 81 | + if exc_type is None: | |
| 82 | + (start, jump_type, size) = self._jumps.pop() | |
| 83 | + if jump_type: | |
| 84 | + self.read(size - (self._pos - start)) | |
| 85 | + else: | |
| 86 | + align = (self._pos - start) % size | |
| 87 | + if align: | |
| 88 | + self.read(size - align) | |
| 89 | + | |
| 90 | + def unpacks(self, format, size): | |
| 91 | + return struct.unpack(format, self.read(size)) | |
| 92 | + | |
| 93 | + def unpack(self, format, size): | |
| 94 | + return self.unpacks(format, size)[0] | |
| 95 | + | |
| 96 | + def raise_error(self, reason, back=0): | |
| 97 | + raise OleFormParsingError('{0}:{1}: {2}'.format(self.path, self._pos - back)) | |
| 98 | + | |
| 99 | + def check_values(self, name, format, size, expected): | |
| 100 | + value = self.unpacks(format, size) | |
| 101 | + if value != expected: | |
| 102 | + self.raise_error('Invalid {0}: expected {1} got {2}'.format(name, str(expected), str(value))) | |
| 103 | + | |
| 104 | + def check_value(self, name, format, size, expected): | |
| 105 | + self.check_values(name, format, size, (expected,)) | |
| 106 | + | |
| 107 | + | |
| 108 | +def consume_TextProps(stream): | |
| 109 | + # TextProps: [MS-OFORMS] 2.3.1 | |
| 110 | + stream.check_values('TextProps (versions)', '<BB', 2, (0, 2)) | |
| 111 | + cbTextProps = stream.unpack('<H', 2) | |
| 112 | + stream.read(cbTextProps) | |
| 113 | + | |
| 114 | +def consume_GuidAndFont(stream): | |
| 115 | + # GuidAndFont: [MS-OFORMS] 2.4.7 | |
| 116 | + UUIDS = stream.unpacks('<LHH', 8) + stream.unpacks('>Q', 8) | |
| 117 | + if UUIDS == (199447043, 36753, 4558, 11376937813817407569L): | |
| 118 | + # UUID == {0BE35203-8F91-11CE-9DE300AA004BB851} | |
| 119 | + # StdFont: [MS-OFORMS] 2.4.12 | |
| 120 | + stream.check_value('StdFont (version)', '<B', 1, 1) | |
| 121 | + # Skip sCharset, bFlags, sWeight, ulHeight | |
| 122 | + stream.read(9) | |
| 123 | + bFaceLen = stream.unpack('<B', 1) | |
| 124 | + stream.read(bFaceLen) | |
| 125 | + elif UUIDs == (2948729120, 55886, 4558, 13349514450607572916L): | |
| 126 | + # UUID == {AFC20920-DA4E-11CE-B94300AA006887B4} | |
| 127 | + consume_TextProps(stream) | |
| 128 | + else: | |
| 129 | + stream.raise_error('Invalid GuidAndFont (UUID)', 16) | |
| 130 | + | |
| 131 | +def consume_GuidAndPicture(stream): | |
| 132 | + # GuidAndPicture: [MS-OFORMS] 2.4.8 | |
| 133 | + # UUID == {0BE35204-8F91-11CE-9DE3-00AA004BB851} | |
| 134 | + stream.check_values('GuidAndPicture (UUID part 1)', '<LHH', 8, (199447044, 36753, 4558)) | |
| 135 | + stream.check_value('GuidAndPicture (UUID part 1)', '>Q', 8, 11376937813817407569L) | |
| 136 | + # StdPicture: [MS-OFORMS] 2.4.13 | |
| 137 | + stream.check_value('StdPicture (Preamble)', '<L', 4, 0x0000746C) | |
| 138 | + size = stream.unpack('<L', 4) | |
| 139 | + stream.read(size) | |
| 140 | + | |
| 141 | +def consume_CountOfBytesWithCompressionFlag(stream): | |
| 142 | + # CountOfBytesWithCompressionFlag or CountOfCharsWithCompressionFlag: [MS-OFORMS] 2.4.14.2 or 2.4.14.3 | |
| 143 | + count = stream.unpack('<L', 4) | |
| 144 | + if not count & 0x80000000 and count != 0: | |
| 145 | + stream.aise_error('Uncompress string length', 4) | |
| 146 | + return count & 0x7FFFFFFF | |
| 147 | + | |
| 148 | +def consume_SiteClassInfo(stream): | |
| 149 | + # SiteClassInfo: [MS-OFORMS] 2.2.10.10.1 | |
| 150 | + stream.check_value('SiteClassInfo (version)', '<H', 2, 0) | |
| 151 | + cbClassTable = stream.unpack('<H', 2) | |
| 152 | + stream.read(cbClassTable) | |
| 153 | + | |
| 154 | +def consume_FormObjectDepthTypeCount(stream): | |
| 155 | + # FormObjectDepthTypeCount: [MS-OFORMS] 2.2.10.7 | |
| 156 | + (depth, mixed) = stream.unpacks('<BB', 2) | |
| 157 | + if mixed & 0x80: | |
| 158 | + stream.check_value('FormObjectDepthTypeCount (SITE_TYPE)', '<B', 1, 1) | |
| 159 | + return mixed ^ 0x80 | |
| 160 | + if mixed != 1: | |
| 161 | + stream.raise_error('Invalid FormObjectDepthTypeCount (SITE_TYPE): expected 1 got {0}'.format(str(mixed))) | |
| 162 | + return 1 | |
| 163 | + | |
| 164 | +def consume_OleSiteConcreteControl(stream): | |
| 165 | + # OleSiteConcreteControl: [MS-OFORMS] 2.2.10.12.1 | |
| 166 | + stream.check_value('OleSiteConcreteControl (version)', '<H', 2, 0) | |
| 167 | + cbSite = stream.unpack('<H', 2) | |
| 168 | + with stream.will_jump_to(cbSite): | |
| 169 | + propmask = SitePropMask(stream.unpack('<L', 4)) | |
| 170 | + # SiteDataBlock: [MS-OFORMS] 2.2.10.12.3 | |
| 171 | + name_len = tag_len = id = 0 | |
| 172 | + if propmask.fName: | |
| 173 | + name_len = consume_CountOfBytesWithCompressionFlag(stream) | |
| 174 | + if propmask.fTag: | |
| 175 | + tag_len = consume_CountOfBytesWithCompressionFlag(stream) | |
| 176 | + if propmask.fID: | |
| 177 | + id = stream.unpack('<L', 4) | |
| 178 | + for prop in ['fHelpContextID', 'fBitFlags', 'fObjectStreamSize']: | |
| 179 | + if propmask[prop]: | |
| 180 | + stream.read(4) | |
| 181 | + tabindex = ClsidCacheIndex = 0 | |
| 182 | + with stream.will_pad(): | |
| 183 | + if propmask.fTabIndex: | |
| 184 | + tabindex = stream.unpack('<H', 2) | |
| 185 | + if propmask.fClsidCacheIndex: | |
| 186 | + ClsidCacheIndex = stream.unpack('<H', 2) | |
| 187 | + if propmask.fGroupID: | |
| 188 | + stream.read(2) | |
| 189 | + # For the next 4 entries, the documentation adds padding, but it should already be aligned?? | |
| 190 | + for prop in ['fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource']: | |
| 191 | + if propmask[prop]: | |
| 192 | + stream.read(4) | |
| 193 | + # SiteExtraDataBlock: [MS-OFORMS] 2.2.10.12.4 | |
| 194 | + name = stream.read(name_len) | |
| 195 | + tag = stream.read(tag_len) | |
| 196 | + return {'name': name, 'tag': tag, 'id': id, 'tabindex': tabindex, | |
| 197 | + 'ClsidCacheIndex': ClsidCacheIndex} | |
| 198 | + | |
| 199 | +def consume_FormControl(stream): | |
| 200 | + # FormControl: [MS-OFORMS] 2.2.10.1 | |
| 201 | + stream.check_values('FormControl (versions)', '<BB', 2, (0, 4)) | |
| 202 | + cbform = stream.unpack('<H', 2) | |
| 203 | + with stream.will_jump_to(cbform): | |
| 204 | + propmask = FormPropMask(stream.unpack('<L', 4)) | |
| 205 | + # FormDataBlock: [MS-OFORMS] 2.2.10.3 | |
| 206 | + for prop in ['fBackColor', 'fForeColor', 'fNextAvailableID']: | |
| 207 | + if propmask[prop]: | |
| 208 | + stream.read(4) | |
| 209 | + if propmask.fBooleanProperties: | |
| 210 | + BooleanProperties = stream.unpack('<L', 4) | |
| 211 | + FORM_FLAG_DONTSAVECLASSTABLE = (BooleanProperties & (1<<15)) >> 15 | |
| 212 | + else: | |
| 213 | + FORM_FLAG_DONTSAVECLASSTABLE = 0 | |
| 214 | + # Skip the rest of DataBlock and ExtraDataBlock | |
| 215 | + # FormStreamData: [MS-OFORMS] 2.2.10.5 | |
| 216 | + if propmask.fMouseIcon: | |
| 217 | + consume_GuidAndPicture(stream) | |
| 218 | + if propmask.fFont: | |
| 219 | + consume_GuidAndFont(stream) | |
| 220 | + if propmask.fPicture: | |
| 221 | + consume_GuidAndPicture(stream) | |
| 222 | + # FormSiteData: [MS-OFORMS] 2.2.10.6 | |
| 223 | + if not FORM_FLAG_DONTSAVECLASSTABLE: | |
| 224 | + CountOfSiteClassInfo = stream.unpack('<H', 2) | |
| 225 | + for i in range(CountOfSiteClassInfo): | |
| 226 | + consume_SiteClassInfo(stream) | |
| 227 | + (CountOfSites, CountOfBytes) = stream.unpacks('<LL', 8) | |
| 228 | + remaining_SiteDepthsAndTypes = CountOfSites | |
| 229 | + with stream.will_pad(): | |
| 230 | + while remaining_SiteDepthsAndTypes > 0: | |
| 231 | + remaining_SiteDepthsAndTypes -= consume_FormObjectDepthTypeCount(stream) | |
| 232 | + for i in range(CountOfSites): | |
| 233 | + yield consume_OleSiteConcreteControl(stream) | |
| 234 | + | |
| 235 | +def consume_MorphDataControl(stream): | |
| 236 | + # MorphDataControl: [MS-OFORMS] 2.2.5.1 | |
| 237 | + stream.check_values('MorphDataControl (versions)', '<BB', 2, (0, 2)) | |
| 238 | + cbMorphData = stream.unpack('<H', 2) | |
| 239 | + with stream.will_jump_to(cbMorphData): | |
| 240 | + propmask = MorphDataPropMask(stream.unpack('<Q', 8)) | |
| 241 | + # MorphDataDataBlock: [MS-OFORMS] 2.2.5.3 | |
| 242 | + for prop in ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength']: | |
| 243 | + if propmask[prop]: | |
| 244 | + stream.read(4) | |
| 245 | + with stream.will_pad(): | |
| 246 | + for prop in ['fBorderStyle', 'fScrollBars', 'fDisplayStyle', 'fMousePointer']: | |
| 247 | + if propmask[prop]: | |
| 248 | + stream.read(1) | |
| 249 | + # PasswordChar, BoundColumn, TextColumn, ColumnCount, and ListRows are 2B + pad = 4B | |
| 250 | + # ListWidth is 4B + pad = 4B | |
| 251 | + for prop in ['fPasswordChar', 'fListWidth', 'fBoundColumn', 'fTextColumn', 'fColumnCount', | |
| 252 | + 'fListRows']: | |
| 253 | + if propmask[prop]: | |
| 254 | + stream.read(4) | |
| 255 | + with stream.will_pad(): | |
| 256 | + if propmask.fcColumnInfo: | |
| 257 | + stream.read(2) | |
| 258 | + for prop in ['fMatchEntry', 'fListStyle', 'fShowDropButtonWhen', 'fDropButtonStyle', | |
| 259 | + 'fMultiSelect']: | |
| 260 | + if propmask[prop]: | |
| 261 | + stream.read(1) | |
| 262 | + if propmask.fValue: | |
| 263 | + value_size = consume_CountOfBytesWithCompressionFlag(stream) | |
| 264 | + else: | |
| 265 | + value_size = 0 | |
| 266 | + # Caption, PicturePosition, BorderColor, SpecialEffect, GroupName are 4B + pad = 4B | |
| 267 | + # MouseIcon, Picture, Accelerator are 2B + pad = 4B | |
| 268 | + for prop in ['fCaption', 'fPicturePosition', 'fBorderColor', 'fSpecialEffect', | |
| 269 | + 'fMouseIcon', 'fPicture', 'fAccelerator', 'fGroupName']: | |
| 270 | + if propmask[prop]: | |
| 271 | + stream.read(4) | |
| 272 | + # MorphDataExtraDataBlock: [MS-OFORMS] 2.2.5.4 | |
| 273 | + stream.read(8) | |
| 274 | + value = stream.read(value_size) | |
| 275 | + # MorphDataStreamData: [MS-OFORMS] 2.2.5.5 | |
| 276 | + if propmask.fMouseIcon: | |
| 277 | + consume_GuidAndPicture(stream) | |
| 278 | + if propmask.fPicture: | |
| 279 | + consume_GuidAndPicture(stream) | |
| 280 | + consume_TextProps(stream) | |
| 281 | + return value | |
| 282 | + | |
| 283 | +def extract_OleFormVariables(ole_file, stream_dir): | |
| 284 | + control = ExtendedStream.open(ole_file, '/'.join(stream_dir + ['f'])) | |
| 285 | + variables = list(consume_FormControl(control)) | |
| 286 | + data = ExtendedStream.open(ole_file, '/'.join(stream_dir + ['o'])) | |
| 287 | + for var in variables: | |
| 288 | + if var['ClsidCacheIndex'] != 23: | |
| 289 | + raise OleFormParsingError('Unsupported stored type: {0}'.format(str(var['ClsidCacheIndex']))) | |
| 290 | + var['value'] = consume_MorphDataControl(data) | |
| 291 | + return variables | ... | ... |
oletools/olevba.py
| ... | ... | @@ -196,9 +196,10 @@ from __future__ import print_function |
| 196 | 196 | # 2017-05-31 c1fe: - PR #135 fixing issue #132 for some Mac files |
| 197 | 197 | # 2017-06-08 PL: - fixed issue #122 Chr() with negative numbers |
| 198 | 198 | # 2017-06-15 PL: - deobfuscation line by line to handle large files |
| 199 | -# 2017-07-11 v0.51.1 PL: - raise exception instead of sys.exit (issue #180) | |
| 199 | +# 2017-07-11 v0.52 PL: - raise exception instead of sys.exit (issue #180) | |
| 200 | +# 2017-11-08 VB: - PR #124 adding user form parsing (Vincent Brillault) | |
| 200 | 201 | |
| 201 | -__version__ = '0.51.1dev1' | |
| 202 | +__version__ = '0.52dev3' | |
| 202 | 203 | |
| 203 | 204 | #------------------------------------------------------------------------------ |
| 204 | 205 | # TODO: |
| ... | ... | @@ -265,6 +266,8 @@ except ImportError: |
| 265 | 266 | + "see http://codespeak.net/lxml " \ |
| 266 | 267 | + "or http://effbot.org/zone/element-index.htm") |
| 267 | 268 | |
| 269 | +from oleform import extract_OleFormVariables | |
| 270 | + | |
| 268 | 271 | # IMPORTANT: it should be possible to run oletools directly as scripts |
| 269 | 272 | # in any directory without installing them with pip or setup.py. |
| 270 | 273 | # In that case, relative imports are NOT usable. |
| ... | ... | @@ -1465,7 +1468,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1465 | 1468 | # So let's ignore it, otherwise it crashes on some files (issue #132) |
| 1466 | 1469 | # PR #135 by @c1fe: |
| 1467 | 1470 | # contrary to the specification I think that the unicode name |
| 1468 | - # is optional. if reference_reserved is not 0x003E I think it | |
| 1471 | + # is optional. if reference_reserved is not 0x003E I think it | |
| 1469 | 1472 | # is actually the start of another REFERENCE record |
| 1470 | 1473 | # at least when projectsyskind_syskind == 0x02 (Macintosh) |
| 1471 | 1474 | if reference_reserved == 0x003E: |
| ... | ... | @@ -2986,6 +2989,24 @@ class VBA_Parser(object): |
| 2986 | 2989 | log.debug('Printable string found in form: %r' % m.group()) |
| 2987 | 2990 | yield (self.filename, '/'.join(o_stream), m.group()) |
| 2988 | 2991 | |
| 2992 | + def extract_form_strings_extended(self): | |
| 2993 | + if self.ole_file is None: | |
| 2994 | + # This may be either an OpenXML/PPT or a text file: | |
| 2995 | + if self.type == TYPE_TEXT: | |
| 2996 | + # This is a text file, return no results: | |
| 2997 | + return | |
| 2998 | + else: | |
| 2999 | + # OpenXML/PPT: recursively yield results from each OLE subfile: | |
| 3000 | + for ole_subfile in self.ole_subfiles: | |
| 3001 | + for results in ole_subfile.extract_form_strings_extended(): | |
| 3002 | + yield results | |
| 3003 | + else: | |
| 3004 | + # This is an OLE file: | |
| 3005 | + self.find_vba_forms() | |
| 3006 | + ole = self.ole_file | |
| 3007 | + for form_storage in self.vba_forms: | |
| 3008 | + for variable in extract_OleFormVariables(ole, form_storage): | |
| 3009 | + yield (self.filename, '/'.join(form_storage), variable) | |
| 2989 | 3010 | |
| 2990 | 3011 | def close(self): |
| 2991 | 3012 | """ |
| ... | ... | @@ -3115,6 +3136,11 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3115 | 3136 | print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)) |
| 3116 | 3137 | print('- ' * 39) |
| 3117 | 3138 | print(form_string) |
| 3139 | + for (subfilename, stream_path, form_variables) in self.extract_form_strings_extended(): | |
| 3140 | + print('-' * 79) | |
| 3141 | + print('VBA FORM Variable "%s" IN %r - OLE stream: %r' % (form_variables['name'], subfilename, stream_path)) | |
| 3142 | + print('- ' * 39) | |
| 3143 | + print(str(form_variables['value'])) | |
| 3118 | 3144 | if not vba_code_only: |
| 3119 | 3145 | # analyse the code from all modules at once: |
| 3120 | 3146 | self.print_analysis(show_decoded_strings, deobfuscate) | ... | ... |
setup.py
| ... | ... | @@ -42,7 +42,7 @@ import os, fnmatch |
| 42 | 42 | #--- METADATA ----------------------------------------------------------------- |
| 43 | 43 | |
| 44 | 44 | name = "oletools" |
| 45 | -version = '0.52dev2' | |
| 45 | +version = '0.52dev3' | |
| 46 | 46 | desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" |
| 47 | 47 | long_desc = open('oletools/README.rst').read() |
| 48 | 48 | author = "Philippe Lagadec" | ... | ... |