Commit 82bb3107927ea4f4356789843698cba652713dd6

Authored by Vincent Brillault
1 parent c5d4ec7b

WIP: Separate object (ExtendedStream) from logic (functions)

oletools/oleform.py
... ... @@ -49,33 +49,43 @@ class MorphDataPropMask(Mask):
49 49 'fBorderColor', 'fSpecialEffect', 'fMouseIcon', 'fPicture', 'fAccelerator',
50 50 'UnusedBits2', 'Reserved', 'fGroupName']
51 51  
52   -class OleUserFormParser(object):
53   - def __init__(self, control_stream, data_stream):
54   - self.variables = []
55   - self.set_stream(control_stream)
56   - self.consume_FormControl()
57   - self.set_stream(data_stream)
58   - self.consume_stored_data()
59   -
60   - def set_stream(self, stream):
  52 +class ExtendedStream(object):
  53 + def __init__(self, stream, path):
61 54 self._pos = 0
62   - self._frozen_pos = []
  55 + self._jumps = []
63 56 self._stream = stream
  57 + self._path = path
  58 +
  59 + @classmethod
  60 + def open(cls, ole_file, path):
  61 + stream = ole_file.openstream(path)
  62 + return cls(stream, path)
64 63  
65 64 def read(self, size):
66 65 self._pos += size
67 66 return self._stream.read(size)
68 67  
69   - def freeze(self):
70   - self._frozen_pos.append(self._pos)
  68 + def will_jump_to(self, size):
  69 + self._next_jump = (True, size)
  70 + return self
  71 +
  72 + def will_pad(self, pad=4):
  73 + self._next_jump = (False, pad)
  74 + return self
71 75  
72   - def unfreeze(self, size):
73   - self.read(self._frozen_pos.pop() - self._pos + size)
  76 + def __enter__(self):
  77 + (jump_type, size) = self._next_jump
  78 + self._jumps.append((self._pos, jump_type, size))
74 79  
75   - def unfreeze_pad(self):
76   - align_pos = (self._pos - self._frozen_pos.pop()) % 4
77   - if align_pos:
78   - self.read(4 - align_pos)
  80 + def __exit__(self, exc_type, exc_value, traceback):
  81 + if exc_type is None:
  82 + (start, jump_type, size) = self._jumps.pop()
  83 + if jump_type:
  84 + self.read(size - (self._pos - start))
  85 + else:
  86 + align = (self._pos - start) % size
  87 + if align:
  88 + self.read(size - align)
79 89  
80 90 def unpacks(self, format, size):
81 91 return struct.unpack(format, self.read(size))
... ... @@ -83,177 +93,174 @@ class OleUserFormParser(object):
83 93 def unpack(self, format, size):
84 94 return self.unpacks(format, size)[0]
85 95  
  96 + def raise_error(self, reason, back=0):
  97 + raise OleFormParsingError('{0}:{1}: {2}'.format(self.path, self._pos - back))
  98 +
86 99 def check_values(self, name, format, size, expected):
87 100 value = self.unpacks(format, size)
88 101 if value != expected:
89   - raise OleFormParsingError('Invalid {0} at {1}: expected {2} got {3}'.format(name, self._pos - size, str(expected), str(value)))
  102 + self.raise_error('Invalid {0}: expected {1} got {2}'.format(name, str(expected), str(value)))
90 103  
91 104 def check_value(self, name, format, size, expected):
92 105 self.check_values(name, format, size, (expected,))
93 106  
94   - def consume_TextProps(self):
95   - # TextProps: [MS-OFORMS] 2.3.1
96   - self.check_values('TextProps (versions)', '<BB', 2, (0, 2))
97   - cbTextProps = self.unpack('<H', 2)
98   - self.read(cbTextProps)
99   -
100   - def consume_GuidAndFont(self):
101   - # GuidAndFont: [MS-OFORMS] 2.4.7
102   - UUIDS = self.unpacks('<LHH', 8) + self.unpacks('>Q', 8)
103   - if UUIDS == (199447043, 36753, 4558, 11376937813817407569L):
104   - # UUID == {0BE35203-8F91-11CE-9DE300AA004BB851}
105   - # StdFont: [MS-OFORMS] 2.4.12
106   - self.check_value('StdFont (version)', '<B', 1, 1)
107   - # Skip sCharset, bFlags, sWeight, ulHeight
108   - self.read(9)
109   - bFaceLen = self.unpack('<B', 1)
110   - self.read(bFaceLen)
111   - elif UUIDs == (2948729120, 55886, 4558, 13349514450607572916L):
112   - # UUID == {AFC20920-DA4E-11CE-B94300AA006887B4}
113   - self.consume_TextProps()
114   - else:
115   - raise OleFormParsingError('Invalid GuidAndFont at {0}: UUID'.format(self._pos - 16))
116   -
117   - def consume_GuidAndPicture(self):
118   - # GuidAndPicture: [MS-OFORMS] 2.4.8
119   - # UUID == {0BE35204-8F91-11CE-9DE3-00AA004BB851}
120   - self.check_values('GuidAndPicture (UUID part 1)', '<LHH', 8, (199447044, 36753, 4558))
121   - self.check_value('GuidAndPicture (UUID part 1)', '>Q', 8, 11376937813817407569L)
122   - # StdPicture: [MS-OFORMS] 2.4.13
123   - self.check_value('StdPicture (Preamble)', '<L', 4, 0x0000746C)
124   - size = self.unpack('<L', 4)
125   - self.read(size)
126   -
127   - def consume_CountOfBytesWithCompressionFlag(self):
128   - # CountOfBytesWithCompressionFlag or CountOfCharsWithCompressionFlag: [MS-OFORMS] 2.4.14.2 or 2.4.14.3
129   - count = self.unpack('<L', 4)
130   - if not count & 0x80000000 and count != 0:
131   - raise OleFormParsingError('Uncompress string length at {0}', self._pos - 4)
132   - return count & 0x7FFFFFFF
133   -
134   - def consume_SiteClassInfo(self):
135   - # SiteClassInfo: [MS-OFORMS] 2.2.10.10.1
136   - self.check_value('SiteClassInfo (version)', '<H', 2, 0)
137   - cbClassTable = self.unpack('<H', 2)
138   - self.read(cbClassTable)
139   -
140   - def consume_FormObjectDepthTypeCount(self):
141   - # FormObjectDepthTypeCount: [MS-OFORMS] 2.2.10.7
142   - (depth, mixed) = self.unpacks('<BB', 2)
143   - if mixed & 0x80:
144   - self.check_value('FormObjectDepthTypeCount (SITE_TYPE)', '<B', 1, 1)
145   - return mixed ^ 0x80
146   - if mixed != 1:
147   - raise OleFormParsingError('Invalid FormObjectDepthTypeCount (SITE_TYPE) at {0}: expected 1 got {3}'.format(self._pos - 2, str(mixed)))
148   - return 1
149   -
150   - def consume_OleSiteConcreteControl(self):
151   - # OleSiteConcreteControl: [MS-OFORMS] 2.2.10.12.1
152   - self.check_value('OleSiteConcreteControl (version)', '<H', 2, 0)
153   - cbSite = self.unpack('<H', 2)
154   - self.freeze()
155   - propmask = SitePropMask(self.unpack('<L', 4))
  107 +
  108 +def consume_TextProps(stream):
  109 + # TextProps: [MS-OFORMS] 2.3.1
  110 + stream.check_values('TextProps (versions)', '<BB', 2, (0, 2))
  111 + cbTextProps = stream.unpack('<H', 2)
  112 + stream.read(cbTextProps)
  113 +
  114 +def consume_GuidAndFont(stream):
  115 + # GuidAndFont: [MS-OFORMS] 2.4.7
  116 + UUIDS = stream.unpacks('<LHH', 8) + stream.unpacks('>Q', 8)
  117 + if UUIDS == (199447043, 36753, 4558, 11376937813817407569L):
  118 + # UUID == {0BE35203-8F91-11CE-9DE300AA004BB851}
  119 + # StdFont: [MS-OFORMS] 2.4.12
  120 + stream.check_value('StdFont (version)', '<B', 1, 1)
  121 + # Skip sCharset, bFlags, sWeight, ulHeight
  122 + stream.read(9)
  123 + bFaceLen = stream.unpack('<B', 1)
  124 + stream.read(bFaceLen)
  125 + elif UUIDs == (2948729120, 55886, 4558, 13349514450607572916L):
  126 + # UUID == {AFC20920-DA4E-11CE-B94300AA006887B4}
  127 + consume_TextProps(stream)
  128 + else:
  129 + stream.raise_error('Invalid GuidAndFont (UUID)', 16)
  130 +
  131 +def consume_GuidAndPicture(stream):
  132 + # GuidAndPicture: [MS-OFORMS] 2.4.8
  133 + # UUID == {0BE35204-8F91-11CE-9DE3-00AA004BB851}
  134 + stream.check_values('GuidAndPicture (UUID part 1)', '<LHH', 8, (199447044, 36753, 4558))
  135 + stream.check_value('GuidAndPicture (UUID part 1)', '>Q', 8, 11376937813817407569L)
  136 + # StdPicture: [MS-OFORMS] 2.4.13
  137 + stream.check_value('StdPicture (Preamble)', '<L', 4, 0x0000746C)
  138 + size = stream.unpack('<L', 4)
  139 + stream.read(size)
  140 +
  141 +def consume_CountOfBytesWithCompressionFlag(stream):
  142 + # CountOfBytesWithCompressionFlag or CountOfCharsWithCompressionFlag: [MS-OFORMS] 2.4.14.2 or 2.4.14.3
  143 + count = stream.unpack('<L', 4)
  144 + if not count & 0x80000000 and count != 0:
  145 + stream.aise_error('Uncompress string length', 4)
  146 + return count & 0x7FFFFFFF
  147 +
  148 +def consume_SiteClassInfo(stream):
  149 + # SiteClassInfo: [MS-OFORMS] 2.2.10.10.1
  150 + stream.check_value('SiteClassInfo (version)', '<H', 2, 0)
  151 + cbClassTable = stream.unpack('<H', 2)
  152 + stream.read(cbClassTable)
  153 +
  154 +def consume_FormObjectDepthTypeCount(stream):
  155 + # FormObjectDepthTypeCount: [MS-OFORMS] 2.2.10.7
  156 + (depth, mixed) = stream.unpacks('<BB', 2)
  157 + if mixed & 0x80:
  158 + stream.check_value('FormObjectDepthTypeCount (SITE_TYPE)', '<B', 1, 1)
  159 + return mixed ^ 0x80
  160 + if mixed != 1:
  161 + stream.raise_error('Invalid FormObjectDepthTypeCount (SITE_TYPE): expected 1 got {0}'.format(str(mixed)))
  162 + return 1
  163 +
  164 +def consume_OleSiteConcreteControl(stream):
  165 + # OleSiteConcreteControl: [MS-OFORMS] 2.2.10.12.1
  166 + stream.check_value('OleSiteConcreteControl (version)', '<H', 2, 0)
  167 + cbSite = stream.unpack('<H', 2)
  168 + with stream.will_jump_to(cbSite):
  169 + propmask = SitePropMask(stream.unpack('<L', 4))
156 170 # SiteDataBlock: [MS-OFORMS] 2.2.10.12.3
157 171 name_len = tag_len = id = 0
158 172 if propmask.fName:
159   - name_len = self.consume_CountOfBytesWithCompressionFlag()
  173 + name_len = consume_CountOfBytesWithCompressionFlag(stream)
160 174 if propmask.fTag:
161   - tag_len = self.consume_CountOfBytesWithCompressionFlag()
  175 + tag_len = consume_CountOfBytesWithCompressionFlag(stream)
162 176 if propmask.fID:
163   - id = self.unpack('<L', 4)
  177 + id = stream.unpack('<L', 4)
164 178 for prop in ['fHelpContextID', 'fBitFlags', 'fObjectStreamSize']:
165 179 if propmask[prop]:
166   - self.read(4)
  180 + stream.read(4)
167 181 tabindex = ClsidCacheIndex = 0
168   - self.freeze()
169   - if propmask.fTabIndex:
170   - tabindex = self.unpack('<H', 2)
171   - if propmask.fClsidCacheIndex:
172   - ClsidCacheIndex = self.unpack('<H', 2)
173   - if propmask.fGroupID:
174   - self.read(2)
175   - self.unfreeze_pad()
  182 + with stream.will_pad():
  183 + if propmask.fTabIndex:
  184 + tabindex = stream.unpack('<H', 2)
  185 + if propmask.fClsidCacheIndex:
  186 + ClsidCacheIndex = stream.unpack('<H', 2)
  187 + if propmask.fGroupID:
  188 + stream.read(2)
176 189 # For the next 4 entries, the documentation adds padding, but it should already be aligned??
177 190 for prop in ['fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource']:
178 191 if propmask[prop]:
179   - self.read(4)
  192 + stream.read(4)
180 193 # SiteExtraDataBlock: [MS-OFORMS] 2.2.10.12.4
181   - name = self.read(name_len)
182   - tag = self.read(tag_len)
183   - self.variables.append({'name': name, 'tag': tag, 'id': id,
184   - 'tabindex': tabindex,
185   - 'ClsidCacheIndex': ClsidCacheIndex})
186   - self.unfreeze(cbSite)
187   -
188   - def consume_FormControl(self):
189   - # FormControl: [MS-OFORMS] 2.2.10.1
190   - self.check_values('FormControl (versions)', '<BB', 2, (0, 4))
191   - cbform = self.unpack('<H', 2)
192   - self.freeze()
193   - propmask = FormPropMask(self.unpack('<L', 4))
  194 + name = stream.read(name_len)
  195 + tag = stream.read(tag_len)
  196 + return {'name': name, 'tag': tag, 'id': id, 'tabindex': tabindex,
  197 + 'ClsidCacheIndex': ClsidCacheIndex}
  198 +
  199 +def consume_FormControl(stream):
  200 + # FormControl: [MS-OFORMS] 2.2.10.1
  201 + stream.check_values('FormControl (versions)', '<BB', 2, (0, 4))
  202 + cbform = stream.unpack('<H', 2)
  203 + with stream.will_jump_to(cbform):
  204 + propmask = FormPropMask(stream.unpack('<L', 4))
194 205 # FormDataBlock: [MS-OFORMS] 2.2.10.3
195 206 for prop in ['fBackColor', 'fForeColor', 'fNextAvailableID']:
196 207 if propmask[prop]:
197   - self.read(4)
  208 + stream.read(4)
198 209 if propmask.fBooleanProperties:
199   - BooleanProperties = self.unpack('<L', 4)
  210 + BooleanProperties = stream.unpack('<L', 4)
200 211 FORM_FLAG_DONTSAVECLASSTABLE = (BooleanProperties & (1<<15)) >> 15
201 212 else:
202 213 FORM_FLAG_DONTSAVECLASSTABLE = 0
203 214 # Skip the rest of DataBlock and ExtraDataBlock
204   - self.unfreeze(cbform)
205   - # FormStreamData: [MS-OFORMS] 2.2.10.5
206   - if propmask.fMouseIcon:
207   - self.consume_GuidAndPicture()
208   - if propmask.fFont:
209   - self.consume_GuidAndFont()
210   - if propmask.fPicture:
211   - self.consume_GuidAndPicture()
212   - # FormSiteData: [MS-OFORMS] 2.2.10.6
213   - if not FORM_FLAG_DONTSAVECLASSTABLE:
214   - CountOfSiteClassInfo = self.unpack('<H', 2)
215   - for i in range(CountOfSiteClassInfo):
216   - self.consume_SiteClassInfo()
217   - (CountOfSites, CountOfBytes) = self.unpacks('<LL', 8)
218   - remaining_SiteDepthsAndTypes = CountOfSites
219   - self.freeze()
  215 + # FormStreamData: [MS-OFORMS] 2.2.10.5
  216 + if propmask.fMouseIcon:
  217 + consume_GuidAndPicture(stream)
  218 + if propmask.fFont:
  219 + consume_GuidAndFont(stream)
  220 + if propmask.fPicture:
  221 + consume_GuidAndPicture(stream)
  222 + # FormSiteData: [MS-OFORMS] 2.2.10.6
  223 + if not FORM_FLAG_DONTSAVECLASSTABLE:
  224 + CountOfSiteClassInfo = stream.unpack('<H', 2)
  225 + for i in range(CountOfSiteClassInfo):
  226 + consume_SiteClassInfo(stream)
  227 + (CountOfSites, CountOfBytes) = stream.unpacks('<LL', 8)
  228 + remaining_SiteDepthsAndTypes = CountOfSites
  229 + with stream.will_pad():
220 230 while remaining_SiteDepthsAndTypes > 0:
221   - remaining_SiteDepthsAndTypes -= self.consume_FormObjectDepthTypeCount()
222   - self.unfreeze_pad()
223   - for i in range(CountOfSites):
224   - self.consume_OleSiteConcreteControl()
225   -
226   - def consume_MorphDataControl(self):
227   - # MorphDataControl: [MS-OFORMS] 2.2.5.1
228   - self.check_values('MorphDataControl (versions)', '<BB', 2, (0, 2))
229   - cbMorphData = self.unpack('<H', 2)
230   - self.freeze()
231   - propmask = MorphDataPropMask(self.unpack('<Q', 8))
  231 + remaining_SiteDepthsAndTypes -= consume_FormObjectDepthTypeCount(stream)
  232 + for i in range(CountOfSites):
  233 + yield consume_OleSiteConcreteControl(stream)
  234 +
  235 +def consume_MorphDataControl(stream):
  236 + # MorphDataControl: [MS-OFORMS] 2.2.5.1
  237 + stream.check_values('MorphDataControl (versions)', '<BB', 2, (0, 2))
  238 + cbMorphData = stream.unpack('<H', 2)
  239 + with stream.will_jump_to(cbMorphData):
  240 + propmask = MorphDataPropMask(stream.unpack('<Q', 8))
232 241 # MorphDataDataBlock: [MS-OFORMS] 2.2.5.3
233 242 for prop in ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength']:
234 243 if propmask[prop]:
235   - self.read(4)
236   - self.freeze()
237   - for prop in ['fBorderStyle', 'fScrollBars', 'fDisplayStyle', 'fMousePointer']:
238   - if propmask[prop]:
239   - self.read(1)
240   - self.unfreeze_pad()
  244 + stream.read(4)
  245 + with stream.will_pad():
  246 + for prop in ['fBorderStyle', 'fScrollBars', 'fDisplayStyle', 'fMousePointer']:
  247 + if propmask[prop]:
  248 + stream.read(1)
241 249 # PasswordChar, BoundColumn, TextColumn, ColumnCount, and ListRows are 2B + pad = 4B
242 250 # ListWidth is 4B + pad = 4B
243 251 for prop in ['fPasswordChar', 'fListWidth', 'fBoundColumn', 'fTextColumn', 'fColumnCount',
244 252 'fListRows']:
245 253 if propmask[prop]:
246   - self.read(4)
247   - self.freeze()
248   - if propmask.fcColumnInfo:
249   - self.read(2)
250   - for prop in ['fMatchEntry', 'fListStyle', 'fShowDropButtonWhen', 'fDropButtonStyle',
251   - 'fMultiSelect']:
252   - if propmask[prop]:
253   - self.read(1)
254   - self.unfreeze_pad()
  254 + stream.read(4)
  255 + with stream.will_pad():
  256 + if propmask.fcColumnInfo:
  257 + stream.read(2)
  258 + for prop in ['fMatchEntry', 'fListStyle', 'fShowDropButtonWhen', 'fDropButtonStyle',
  259 + 'fMultiSelect']:
  260 + if propmask[prop]:
  261 + stream.read(1)
255 262 if propmask.fValue:
256   - value_size = self.consume_CountOfBytesWithCompressionFlag()
  263 + value_size = consume_CountOfBytesWithCompressionFlag(stream)
257 264 else:
258 265 value_size = 0
259 266 # Caption, PicturePosition, BorderColor, SpecialEffect, GroupName are 4B + pad = 4B
... ... @@ -261,27 +268,24 @@ class OleUserFormParser(object):
261 268 for prop in ['fCaption', 'fPicturePosition', 'fBorderColor', 'fSpecialEffect',
262 269 'fMouseIcon', 'fPicture', 'fAccelerator', 'fGroupName']:
263 270 if propmask[prop]:
264   - self.read(4)
  271 + stream.read(4)
265 272 # MorphDataExtraDataBlock: [MS-OFORMS] 2.2.5.4
266   - self.read(8)
267   - value = self.read(value_size)
268   - self.unfreeze(cbMorphData)
269   - # MorphDataStreamData: [MS-OFORMS] 2.2.5.5
270   - if propmask.fMouseIcon:
271   - self.consume_GuidAndPicture()
272   - if propmask.fPicture:
273   - self.consume_GuidAndPicture()
274   - self.consume_TextProps()
275   - return value
276   -
277   - def consume_stored_data(self):
278   - for var in self.variables:
279   - if var['ClsidCacheIndex'] != 23:
280   - raise OleFormParsingError('Unsupported stored type: {0}'.format(str(var['ClsidCacheIndex'])))
281   - var['value'] = self.consume_MorphDataControl()
282   -
283   -def OleFormVariables(ole_file, stream_dir):
284   - control_stream = ole_file.openstream('/'.join(stream_dir + ['f']))
285   - data_stream = ole_file.openstream('/'.join(stream_dir + ['o']))
286   - form = OleUserFormParser(control_stream, data_stream)
287   - return form.variables
  273 + stream.read(8)
  274 + value = stream.read(value_size)
  275 + # MorphDataStreamData: [MS-OFORMS] 2.2.5.5
  276 + if propmask.fMouseIcon:
  277 + consume_GuidAndPicture(stream)
  278 + if propmask.fPicture:
  279 + consume_GuidAndPicture(stream)
  280 + consume_TextProps(stream)
  281 + return value
  282 +
  283 +def extract_OleFormVariables(ole_file, stream_dir):
  284 + control = ExtendedStream.open(ole_file, '/'.join(stream_dir + ['f']))
  285 + variables = list(consume_FormControl(control))
  286 + data = ExtendedStream.open(ole_file, '/'.join(stream_dir + ['o']))
  287 + for var in variables:
  288 + if var['ClsidCacheIndex'] != 23:
  289 + raise OleFormParsingError('Unsupported stored type: {0}'.format(str(var['ClsidCacheIndex'])))
  290 + var['value'] = consume_MorphDataControl(data)
  291 + return variables
... ...
oletools/olevba.py
... ... @@ -254,7 +254,7 @@ except ImportError:
254 254 + "see http://codespeak.net/lxml " \
255 255 + "or http://effbot.org/zone/element-index.htm")
256 256  
257   -from oleform import OleFormVariables
  257 +from oleform import extract_OleFormVariables
258 258  
259 259 import thirdparty.olefile as olefile
260 260 from thirdparty.prettytable import prettytable
... ... @@ -2924,7 +2924,7 @@ class VBA_Parser(object):
2924 2924 self.find_vba_forms()
2925 2925 ole = self.ole_file
2926 2926 for form_storage in self.vba_forms:
2927   - for variable in OleFormVariables(ole, form_storage):
  2927 + for variable in extract_OleFormVariables(ole, form_storage):
2928 2928 yield (self.filename, '/'.join(form_storage), variable)
2929 2929  
2930 2930 def close(self):
... ...