Commit 7909381f67d8b4d5e43e26ce4bd5fc53678b24ee

Authored by decalage2
1 parent d0d4c87f

merged PR #124

oletools/oleform.py 0 → 100644
  1 +#!/usr/bin/env python
  2 +
  3 +import struct
  4 +
  5 +class OleFormParsingError(Exception):
  6 + pass
  7 +
  8 +class Mask(object):
  9 + def __init__(self, val):
  10 + self._val = [(val & (1<<i))>>i for i in range(self._size)]
  11 +
  12 + def __str__(self):
  13 + return ', '.join(self._names[i] for i in range(self._size) if self._val[i])
  14 +
  15 + def __getattr__(self, name):
  16 + return self._val[self._names.index(name)]
  17 +
  18 + def __len__(self):
  19 + return self.size
  20 +
  21 + def __getitem__(self, key):
  22 + return self._val[self._names.index(key)]
  23 +
  24 +class FormPropMask(Mask):
  25 + """FormPropMask: [MS-OFORMS] 2.2.10.2"""
  26 + _size = 28
  27 + _names = ['Unused1', 'fBackColor', 'fForeColor', 'fNextAvailableID', 'Unused2_0', 'Unused2_1',
  28 + 'fBooleanProperties', 'fBooleanProperties', 'fMousePointer', 'fScrollBars',
  29 + 'fDisplayedSize', 'fLogicalSize', 'fScrollPosition', 'fGroupCnt', 'Reserved',
  30 + 'fMouseIcon', 'fCycle', 'fSpecialEffect', 'fBorderColor', 'fCaption', 'fFont',
  31 + 'fPicture', 'fZoom', 'fPictureAlignment', 'fPictureTiling', 'fPictureSizeMode',
  32 + 'fShapeCookie', 'fDrawBuffer']
  33 +
  34 +class SitePropMask(Mask):
  35 + """SitePropMask: [MS-OFORMS] 2.2.10.12.2"""
  36 + _size = 15
  37 + _names = ['fName', 'fTag', 'fID', 'fHelpContextID', 'fBitFlags', 'fObjectStreamSize',
  38 + 'fTabIndex', 'fClsidCacheIndex', 'fPosition', 'fGroupID', 'Unused1',
  39 + 'fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource']
  40 +
  41 +class MorphDataPropMask(Mask):
  42 + """MorphDataPropMask: [MS-OFORMS] 2.2.5.2"""
  43 + _size = 33
  44 + _names = ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength', 'fBorderStyle',
  45 + 'fScrollBars', 'fDisplayStyle', 'fMousePointer', 'fSize', 'fPasswordChar',
  46 + 'fListWidth', 'fBoundColumn', 'fTextColumn', 'fColumnCount', 'fListRows',
  47 + 'fcColumnInfo', 'fMatchEntry', 'fListStyle', 'fShowDropButtonWhen', 'UnusedBits1',
  48 + 'fDropButtonStyle', 'fMultiSelect', 'fValue', 'fCaption', 'fPicturePosition',
  49 + 'fBorderColor', 'fSpecialEffect', 'fMouseIcon', 'fPicture', 'fAccelerator',
  50 + 'UnusedBits2', 'Reserved', 'fGroupName']
  51 +
  52 +class ExtendedStream(object):
  53 + def __init__(self, stream, path):
  54 + self._pos = 0
  55 + self._jumps = []
  56 + self._stream = stream
  57 + self._path = path
  58 +
  59 + @classmethod
  60 + def open(cls, ole_file, path):
  61 + stream = ole_file.openstream(path)
  62 + return cls(stream, path)
  63 +
  64 + def read(self, size):
  65 + self._pos += size
  66 + return self._stream.read(size)
  67 +
  68 + def will_jump_to(self, size):
  69 + self._next_jump = (True, size)
  70 + return self
  71 +
  72 + def will_pad(self, pad=4):
  73 + self._next_jump = (False, pad)
  74 + return self
  75 +
  76 + def __enter__(self):
  77 + (jump_type, size) = self._next_jump
  78 + self._jumps.append((self._pos, jump_type, size))
  79 +
  80 + def __exit__(self, exc_type, exc_value, traceback):
  81 + if exc_type is None:
  82 + (start, jump_type, size) = self._jumps.pop()
  83 + if jump_type:
  84 + self.read(size - (self._pos - start))
  85 + else:
  86 + align = (self._pos - start) % size
  87 + if align:
  88 + self.read(size - align)
  89 +
  90 + def unpacks(self, format, size):
  91 + return struct.unpack(format, self.read(size))
  92 +
  93 + def unpack(self, format, size):
  94 + return self.unpacks(format, size)[0]
  95 +
  96 + def raise_error(self, reason, back=0):
  97 + raise OleFormParsingError('{0}:{1}: {2}'.format(self.path, self._pos - back))
  98 +
  99 + def check_values(self, name, format, size, expected):
  100 + value = self.unpacks(format, size)
  101 + if value != expected:
  102 + self.raise_error('Invalid {0}: expected {1} got {2}'.format(name, str(expected), str(value)))
  103 +
  104 + def check_value(self, name, format, size, expected):
  105 + self.check_values(name, format, size, (expected,))
  106 +
  107 +
  108 +def consume_TextProps(stream):
  109 + # TextProps: [MS-OFORMS] 2.3.1
  110 + stream.check_values('TextProps (versions)', '<BB', 2, (0, 2))
  111 + cbTextProps = stream.unpack('<H', 2)
  112 + stream.read(cbTextProps)
  113 +
  114 +def consume_GuidAndFont(stream):
  115 + # GuidAndFont: [MS-OFORMS] 2.4.7
  116 + UUIDS = stream.unpacks('<LHH', 8) + stream.unpacks('>Q', 8)
  117 + if UUIDS == (199447043, 36753, 4558, 11376937813817407569L):
  118 + # UUID == {0BE35203-8F91-11CE-9DE300AA004BB851}
  119 + # StdFont: [MS-OFORMS] 2.4.12
  120 + stream.check_value('StdFont (version)', '<B', 1, 1)
  121 + # Skip sCharset, bFlags, sWeight, ulHeight
  122 + stream.read(9)
  123 + bFaceLen = stream.unpack('<B', 1)
  124 + stream.read(bFaceLen)
  125 + elif UUIDs == (2948729120, 55886, 4558, 13349514450607572916L):
  126 + # UUID == {AFC20920-DA4E-11CE-B94300AA006887B4}
  127 + consume_TextProps(stream)
  128 + else:
  129 + stream.raise_error('Invalid GuidAndFont (UUID)', 16)
  130 +
  131 +def consume_GuidAndPicture(stream):
  132 + # GuidAndPicture: [MS-OFORMS] 2.4.8
  133 + # UUID == {0BE35204-8F91-11CE-9DE3-00AA004BB851}
  134 + stream.check_values('GuidAndPicture (UUID part 1)', '<LHH', 8, (199447044, 36753, 4558))
  135 + stream.check_value('GuidAndPicture (UUID part 1)', '>Q', 8, 11376937813817407569L)
  136 + # StdPicture: [MS-OFORMS] 2.4.13
  137 + stream.check_value('StdPicture (Preamble)', '<L', 4, 0x0000746C)
  138 + size = stream.unpack('<L', 4)
  139 + stream.read(size)
  140 +
  141 +def consume_CountOfBytesWithCompressionFlag(stream):
  142 + # CountOfBytesWithCompressionFlag or CountOfCharsWithCompressionFlag: [MS-OFORMS] 2.4.14.2 or 2.4.14.3
  143 + count = stream.unpack('<L', 4)
  144 + if not count & 0x80000000 and count != 0:
  145 + stream.aise_error('Uncompress string length', 4)
  146 + return count & 0x7FFFFFFF
  147 +
  148 +def consume_SiteClassInfo(stream):
  149 + # SiteClassInfo: [MS-OFORMS] 2.2.10.10.1
  150 + stream.check_value('SiteClassInfo (version)', '<H', 2, 0)
  151 + cbClassTable = stream.unpack('<H', 2)
  152 + stream.read(cbClassTable)
  153 +
  154 +def consume_FormObjectDepthTypeCount(stream):
  155 + # FormObjectDepthTypeCount: [MS-OFORMS] 2.2.10.7
  156 + (depth, mixed) = stream.unpacks('<BB', 2)
  157 + if mixed & 0x80:
  158 + stream.check_value('FormObjectDepthTypeCount (SITE_TYPE)', '<B', 1, 1)
  159 + return mixed ^ 0x80
  160 + if mixed != 1:
  161 + stream.raise_error('Invalid FormObjectDepthTypeCount (SITE_TYPE): expected 1 got {0}'.format(str(mixed)))
  162 + return 1
  163 +
  164 +def consume_OleSiteConcreteControl(stream):
  165 + # OleSiteConcreteControl: [MS-OFORMS] 2.2.10.12.1
  166 + stream.check_value('OleSiteConcreteControl (version)', '<H', 2, 0)
  167 + cbSite = stream.unpack('<H', 2)
  168 + with stream.will_jump_to(cbSite):
  169 + propmask = SitePropMask(stream.unpack('<L', 4))
  170 + # SiteDataBlock: [MS-OFORMS] 2.2.10.12.3
  171 + name_len = tag_len = id = 0
  172 + if propmask.fName:
  173 + name_len = consume_CountOfBytesWithCompressionFlag(stream)
  174 + if propmask.fTag:
  175 + tag_len = consume_CountOfBytesWithCompressionFlag(stream)
  176 + if propmask.fID:
  177 + id = stream.unpack('<L', 4)
  178 + for prop in ['fHelpContextID', 'fBitFlags', 'fObjectStreamSize']:
  179 + if propmask[prop]:
  180 + stream.read(4)
  181 + tabindex = ClsidCacheIndex = 0
  182 + with stream.will_pad():
  183 + if propmask.fTabIndex:
  184 + tabindex = stream.unpack('<H', 2)
  185 + if propmask.fClsidCacheIndex:
  186 + ClsidCacheIndex = stream.unpack('<H', 2)
  187 + if propmask.fGroupID:
  188 + stream.read(2)
  189 + # For the next 4 entries, the documentation adds padding, but it should already be aligned??
  190 + for prop in ['fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource']:
  191 + if propmask[prop]:
  192 + stream.read(4)
  193 + # SiteExtraDataBlock: [MS-OFORMS] 2.2.10.12.4
  194 + name = stream.read(name_len)
  195 + tag = stream.read(tag_len)
  196 + return {'name': name, 'tag': tag, 'id': id, 'tabindex': tabindex,
  197 + 'ClsidCacheIndex': ClsidCacheIndex}
  198 +
  199 +def consume_FormControl(stream):
  200 + # FormControl: [MS-OFORMS] 2.2.10.1
  201 + stream.check_values('FormControl (versions)', '<BB', 2, (0, 4))
  202 + cbform = stream.unpack('<H', 2)
  203 + with stream.will_jump_to(cbform):
  204 + propmask = FormPropMask(stream.unpack('<L', 4))
  205 + # FormDataBlock: [MS-OFORMS] 2.2.10.3
  206 + for prop in ['fBackColor', 'fForeColor', 'fNextAvailableID']:
  207 + if propmask[prop]:
  208 + stream.read(4)
  209 + if propmask.fBooleanProperties:
  210 + BooleanProperties = stream.unpack('<L', 4)
  211 + FORM_FLAG_DONTSAVECLASSTABLE = (BooleanProperties & (1<<15)) >> 15
  212 + else:
  213 + FORM_FLAG_DONTSAVECLASSTABLE = 0
  214 + # Skip the rest of DataBlock and ExtraDataBlock
  215 + # FormStreamData: [MS-OFORMS] 2.2.10.5
  216 + if propmask.fMouseIcon:
  217 + consume_GuidAndPicture(stream)
  218 + if propmask.fFont:
  219 + consume_GuidAndFont(stream)
  220 + if propmask.fPicture:
  221 + consume_GuidAndPicture(stream)
  222 + # FormSiteData: [MS-OFORMS] 2.2.10.6
  223 + if not FORM_FLAG_DONTSAVECLASSTABLE:
  224 + CountOfSiteClassInfo = stream.unpack('<H', 2)
  225 + for i in range(CountOfSiteClassInfo):
  226 + consume_SiteClassInfo(stream)
  227 + (CountOfSites, CountOfBytes) = stream.unpacks('<LL', 8)
  228 + remaining_SiteDepthsAndTypes = CountOfSites
  229 + with stream.will_pad():
  230 + while remaining_SiteDepthsAndTypes > 0:
  231 + remaining_SiteDepthsAndTypes -= consume_FormObjectDepthTypeCount(stream)
  232 + for i in range(CountOfSites):
  233 + yield consume_OleSiteConcreteControl(stream)
  234 +
  235 +def consume_MorphDataControl(stream):
  236 + # MorphDataControl: [MS-OFORMS] 2.2.5.1
  237 + stream.check_values('MorphDataControl (versions)', '<BB', 2, (0, 2))
  238 + cbMorphData = stream.unpack('<H', 2)
  239 + with stream.will_jump_to(cbMorphData):
  240 + propmask = MorphDataPropMask(stream.unpack('<Q', 8))
  241 + # MorphDataDataBlock: [MS-OFORMS] 2.2.5.3
  242 + for prop in ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength']:
  243 + if propmask[prop]:
  244 + stream.read(4)
  245 + with stream.will_pad():
  246 + for prop in ['fBorderStyle', 'fScrollBars', 'fDisplayStyle', 'fMousePointer']:
  247 + if propmask[prop]:
  248 + stream.read(1)
  249 + # PasswordChar, BoundColumn, TextColumn, ColumnCount, and ListRows are 2B + pad = 4B
  250 + # ListWidth is 4B + pad = 4B
  251 + for prop in ['fPasswordChar', 'fListWidth', 'fBoundColumn', 'fTextColumn', 'fColumnCount',
  252 + 'fListRows']:
  253 + if propmask[prop]:
  254 + stream.read(4)
  255 + with stream.will_pad():
  256 + if propmask.fcColumnInfo:
  257 + stream.read(2)
  258 + for prop in ['fMatchEntry', 'fListStyle', 'fShowDropButtonWhen', 'fDropButtonStyle',
  259 + 'fMultiSelect']:
  260 + if propmask[prop]:
  261 + stream.read(1)
  262 + if propmask.fValue:
  263 + value_size = consume_CountOfBytesWithCompressionFlag(stream)
  264 + else:
  265 + value_size = 0
  266 + # Caption, PicturePosition, BorderColor, SpecialEffect, GroupName are 4B + pad = 4B
  267 + # MouseIcon, Picture, Accelerator are 2B + pad = 4B
  268 + for prop in ['fCaption', 'fPicturePosition', 'fBorderColor', 'fSpecialEffect',
  269 + 'fMouseIcon', 'fPicture', 'fAccelerator', 'fGroupName']:
  270 + if propmask[prop]:
  271 + stream.read(4)
  272 + # MorphDataExtraDataBlock: [MS-OFORMS] 2.2.5.4
  273 + stream.read(8)
  274 + value = stream.read(value_size)
  275 + # MorphDataStreamData: [MS-OFORMS] 2.2.5.5
  276 + if propmask.fMouseIcon:
  277 + consume_GuidAndPicture(stream)
  278 + if propmask.fPicture:
  279 + consume_GuidAndPicture(stream)
  280 + consume_TextProps(stream)
  281 + return value
  282 +
  283 +def extract_OleFormVariables(ole_file, stream_dir):
  284 + control = ExtendedStream.open(ole_file, '/'.join(stream_dir + ['f']))
  285 + variables = list(consume_FormControl(control))
  286 + data = ExtendedStream.open(ole_file, '/'.join(stream_dir + ['o']))
  287 + for var in variables:
  288 + if var['ClsidCacheIndex'] != 23:
  289 + raise OleFormParsingError('Unsupported stored type: {0}'.format(str(var['ClsidCacheIndex'])))
  290 + var['value'] = consume_MorphDataControl(data)
  291 + return variables
... ...
oletools/olevba.py
... ... @@ -196,9 +196,10 @@ from __future__ import print_function
196 196 # 2017-05-31 c1fe: - PR #135 fixing issue #132 for some Mac files
197 197 # 2017-06-08 PL: - fixed issue #122 Chr() with negative numbers
198 198 # 2017-06-15 PL: - deobfuscation line by line to handle large files
199   -# 2017-07-11 v0.51.1 PL: - raise exception instead of sys.exit (issue #180)
  199 +# 2017-07-11 v0.52 PL: - raise exception instead of sys.exit (issue #180)
  200 +# 2017-11-08 VB: - PR #124 adding user form parsing (Vincent Brillault)
200 201  
201   -__version__ = '0.51.1dev1'
  202 +__version__ = '0.52dev3'
202 203  
203 204 #------------------------------------------------------------------------------
204 205 # TODO:
... ... @@ -265,6 +266,8 @@ except ImportError:
265 266 + "see http://codespeak.net/lxml " \
266 267 + "or http://effbot.org/zone/element-index.htm")
267 268  
  269 +from oleform import extract_OleFormVariables
  270 +
268 271 # IMPORTANT: it should be possible to run oletools directly as scripts
269 272 # in any directory without installing them with pip or setup.py.
270 273 # In that case, relative imports are NOT usable.
... ... @@ -1465,7 +1468,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1465 1468 # So let's ignore it, otherwise it crashes on some files (issue #132)
1466 1469 # PR #135 by @c1fe:
1467 1470 # contrary to the specification I think that the unicode name
1468   - # is optional. if reference_reserved is not 0x003E I think it
  1471 + # is optional. if reference_reserved is not 0x003E I think it
1469 1472 # is actually the start of another REFERENCE record
1470 1473 # at least when projectsyskind_syskind == 0x02 (Macintosh)
1471 1474 if reference_reserved == 0x003E:
... ... @@ -2986,6 +2989,24 @@ class VBA_Parser(object):
2986 2989 log.debug('Printable string found in form: %r' % m.group())
2987 2990 yield (self.filename, '/'.join(o_stream), m.group())
2988 2991  
  2992 + def extract_form_strings_extended(self):
  2993 + if self.ole_file is None:
  2994 + # This may be either an OpenXML/PPT or a text file:
  2995 + if self.type == TYPE_TEXT:
  2996 + # This is a text file, return no results:
  2997 + return
  2998 + else:
  2999 + # OpenXML/PPT: recursively yield results from each OLE subfile:
  3000 + for ole_subfile in self.ole_subfiles:
  3001 + for results in ole_subfile.extract_form_strings_extended():
  3002 + yield results
  3003 + else:
  3004 + # This is an OLE file:
  3005 + self.find_vba_forms()
  3006 + ole = self.ole_file
  3007 + for form_storage in self.vba_forms:
  3008 + for variable in extract_OleFormVariables(ole, form_storage):
  3009 + yield (self.filename, '/'.join(form_storage), variable)
2989 3010  
2990 3011 def close(self):
2991 3012 """
... ... @@ -3115,6 +3136,11 @@ class VBA_Parser_CLI(VBA_Parser):
3115 3136 print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path))
3116 3137 print('- ' * 39)
3117 3138 print(form_string)
  3139 + for (subfilename, stream_path, form_variables) in self.extract_form_strings_extended():
  3140 + print('-' * 79)
  3141 + print('VBA FORM Variable "%s" IN %r - OLE stream: %r' % (form_variables['name'], subfilename, stream_path))
  3142 + print('- ' * 39)
  3143 + print(str(form_variables['value']))
3118 3144 if not vba_code_only:
3119 3145 # analyse the code from all modules at once:
3120 3146 self.print_analysis(show_decoded_strings, deobfuscate)
... ...
setup.py
... ... @@ -42,7 +42,7 @@ import os, fnmatch
42 42 #--- METADATA -----------------------------------------------------------------
43 43  
44 44 name = "oletools"
45   -version = '0.52dev2'
  45 +version = '0.52dev3'
46 46 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR"
47 47 long_desc = open('oletools/README.rst').read()
48 48 author = "Philippe Lagadec"
... ...