Commit f8664a4f7167c1d98bce36d83140269f8cb32480

Authored by Vincent Brillault
1 parent a5151358

Oleform: rewrite padding handling to support laziness

Padded structures are in fact lazily padded: the pad is only applied when
needed, not immediately. For example considering the following struct:
```
| a (1B, opt) | b (1B, opt) | c (1B, opt) | d (1B, opt) |
| ... pad ... |
| e (4B, opt) |
| f (2B, opt) | ... pad ...|
```
If only a and f are present (all other optional parts not used), the
actual content will be: | a (1B) | pad (1B) | f (2B) |

The previous implementation resulted in:
| a (1B) | pad (3B) | f (2B) | pad (2B) |
which shifted the whole structure and failed...
Showing 1 changed file with 77 additions and 50 deletions
oletools/oleform.py
@@ -104,6 +104,8 @@ class ExtendedStream(object): @@ -104,6 +104,8 @@ class ExtendedStream(object):
104 self._jumps = [] 104 self._jumps = []
105 self._stream = stream 105 self._stream = stream
106 self._path = path 106 self._path = path
  107 + self._padding = False
  108 + self._pad_start = 0
107 109
108 @classmethod 110 @classmethod
109 def open(cls, ole_file, path): 111 def open(cls, ole_file, path):
@@ -114,31 +116,55 @@ class ExtendedStream(object): @@ -114,31 +116,55 @@ class ExtendedStream(object):
114 # print('declared size: %d' % ole_file.get_size(path)) 116 # print('declared size: %d' % ole_file.get_size(path))
115 return cls(stream, path) 117 return cls(stream, path)
116 118
117 - def read(self, size): 119 + def _read(self, size):
118 self._pos += size 120 self._pos += size
119 return self._stream.read(size) 121 return self._stream.read(size)
120 122
  123 + def _pad(self, start, size=4):
  124 + offset = (self._pos - start) % size
  125 + if offset:
  126 + self._read(size - offset)
  127 +
  128 + def read(self, size):
  129 + if self._padding:
  130 + self._pad(self._pad_start, size)
  131 + return self._read(size)
  132 +
121 def will_jump_to(self, size): 133 def will_jump_to(self, size):
122 - self._next_jump = (True, size) 134 + self._next_jump = ('jump', (self._pos, size))
123 return self 135 return self
124 136
125 - def will_pad(self, pad=4):  
126 - self._next_jump = (False, pad) 137 + def will_pad(self):
  138 + self._next_jump = ('pad', self._pos)
  139 + return self
  140 +
  141 + def padded_struct(self):
  142 + self._next_jump = ('padded', (self._padding, self._pad_start))
  143 + self._padding = True
  144 + self._pad_start = self._pos
127 return self 145 return self
128 146
129 def __enter__(self): 147 def __enter__(self):
130 - (jump_type, size) = self._next_jump  
131 - self._jumps.append((self._pos, jump_type, size)) 148 + assert(self._next_jump)
  149 + self._jumps.append(self._next_jump)
  150 + self._next_jump = None
132 151
133 def __exit__(self, exc_type, exc_value, traceback): 152 def __exit__(self, exc_type, exc_value, traceback):
134 if exc_type is None: 153 if exc_type is None:
135 - (start, jump_type, size) = self._jumps.pop()  
136 - if jump_type:  
137 - self.read(size - (self._pos - start))  
138 - else:  
139 - align = (self._pos - start) % size  
140 - if align:  
141 - self.read(size - align) 154 + (jump_type, data) = self._jumps.pop()
  155 + if jump_type == 'jump':
  156 + (start, size) = data
  157 + consummed = self._pos - start
  158 + if consummed > size:
  159 + self.raise_error('Bad jump: too much read ({0} > {1})'.format(consummed, size))
  160 + self.read(size - consummed)
  161 + elif jump_type == 'pad':
  162 + self._pad(data)
  163 + elif jump_type == 'padded':
  164 + (prev_padding, prev_pad_start) = data
  165 + self._pad(self._pad_start)
  166 + self._padding = prev_padding
  167 + self._pad_start = prev_pad_start
142 168
143 def unpacks(self, format, size): 169 def unpacks(self, format, size):
144 return struct.unpack(format, self.read(size)) 170 return struct.unpack(format, self.read(size))
@@ -219,28 +245,27 @@ def consume_OleSiteConcreteControl(stream): @@ -219,28 +245,27 @@ def consume_OleSiteConcreteControl(stream):
219 with stream.will_jump_to(cbSite): 245 with stream.will_jump_to(cbSite):
220 propmask = SitePropMask(stream.unpack('<L', 4)) 246 propmask = SitePropMask(stream.unpack('<L', 4))
221 # SiteDataBlock: [MS-OFORMS] 2.2.10.12.3 247 # SiteDataBlock: [MS-OFORMS] 2.2.10.12.3
222 - name_len = tag_len = id = 0  
223 - if propmask.fName:  
224 - name_len = consume_CountOfBytesWithCompressionFlag(stream)  
225 - if propmask.fTag:  
226 - tag_len = consume_CountOfBytesWithCompressionFlag(stream)  
227 - if propmask.fID:  
228 - id = stream.unpack('<L', 4)  
229 - for prop in ['fHelpContextID', 'fBitFlags', 'fObjectStreamSize']:  
230 - if propmask[prop]:  
231 - stream.read(4)  
232 - tabindex = ClsidCacheIndex = 0  
233 - with stream.will_pad(): 248 + with stream.padded_struct():
  249 + name_len = tag_len = id = 0
  250 + if propmask.fName:
  251 + name_len = consume_CountOfBytesWithCompressionFlag(stream)
  252 + if propmask.fTag:
  253 + tag_len = consume_CountOfBytesWithCompressionFlag(stream)
  254 + if propmask.fID:
  255 + id = stream.unpack('<L', 4)
  256 + for prop in ['fHelpContextID', 'fBitFlags', 'fObjectStreamSize']:
  257 + if propmask[prop]:
  258 + stream.read(4)
  259 + tabindex = ClsidCacheIndex = 0
234 if propmask.fTabIndex: 260 if propmask.fTabIndex:
235 tabindex = stream.unpack('<H', 2) 261 tabindex = stream.unpack('<H', 2)
236 if propmask.fClsidCacheIndex: 262 if propmask.fClsidCacheIndex:
237 ClsidCacheIndex = stream.unpack('<H', 2) 263 ClsidCacheIndex = stream.unpack('<H', 2)
238 if propmask.fGroupID: 264 if propmask.fGroupID:
239 stream.read(2) 265 stream.read(2)
240 - # For the next 4 entries, the documentation adds padding, but it should already be aligned??  
241 - for prop in ['fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource']:  
242 - if propmask[prop]:  
243 - stream.read(4) 266 + for prop in ['fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource']:
  267 + if propmask[prop]:
  268 + stream.read(4)
244 # SiteExtraDataBlock: [MS-OFORMS] 2.2.10.12.4 269 # SiteExtraDataBlock: [MS-OFORMS] 2.2.10.12.4
245 name = stream.read(name_len) 270 name = stream.read(name_len)
246 tag = stream.read(tag_len) 271 tag = stream.read(tag_len)
@@ -291,35 +316,37 @@ def consume_MorphDataControl(stream): @@ -291,35 +316,37 @@ def consume_MorphDataControl(stream):
291 with stream.will_jump_to(cbMorphData): 316 with stream.will_jump_to(cbMorphData):
292 propmask = MorphDataPropMask(stream.unpack('<Q', 8)) 317 propmask = MorphDataPropMask(stream.unpack('<Q', 8))
293 # MorphDataDataBlock: [MS-OFORMS] 2.2.5.3 318 # MorphDataDataBlock: [MS-OFORMS] 2.2.5.3
294 - for prop in ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength']:  
295 - if propmask[prop]:  
296 - stream.read(4)  
297 - with stream.will_pad(): 319 + with stream.padded_struct():
  320 + for prop in ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength']:
  321 + if propmask[prop]:
  322 + stream.read(4)
298 for prop in ['fBorderStyle', 'fScrollBars', 'fDisplayStyle', 'fMousePointer']: 323 for prop in ['fBorderStyle', 'fScrollBars', 'fDisplayStyle', 'fMousePointer']:
299 if propmask[prop]: 324 if propmask[prop]:
300 stream.read(1) 325 stream.read(1)
301 - # PasswordChar, BoundColumn, TextColumn, ColumnCount, and ListRows are 2B + pad = 4B  
302 - # ListWidth is 4B + pad = 4B  
303 - for prop in ['fPasswordChar', 'fListWidth', 'fBoundColumn', 'fTextColumn', 'fColumnCount',  
304 - 'fListRows']:  
305 - if propmask[prop]: 326 + if propmask['fPasswordChar']:
  327 + stream.read(2)
  328 + if propmask['fListWidth']:
306 stream.read(4) 329 stream.read(4)
307 - with stream.will_pad(): 330 + for prop in ['fBoundColumn', 'fTextColumn', 'fColumnCount', 'fListRows']:
  331 + if propmask[prop]:
  332 + stream.read(2)
308 if propmask.fcColumnInfo: 333 if propmask.fcColumnInfo:
309 stream.read(2) 334 stream.read(2)
310 - for prop in ['fMatchEntry', 'fListStyle', 'fShowDropButtonWhen', 'fDropButtonStyle',  
311 - 'fMultiSelect']: 335 + for prop in ['fMatchEntry', 'fListStyle', 'fShowDropButtonWhen',
  336 + 'fDropButtonStyle', 'fMultiSelect']:
312 if propmask[prop]: 337 if propmask[prop]:
313 stream.read(1) 338 stream.read(1)
314 - if propmask.fValue:  
315 - value_size = consume_CountOfBytesWithCompressionFlag(stream)  
316 - else:  
317 - value_size = 0  
318 - # Caption, PicturePosition, BorderColor, SpecialEffect, GroupName are 4B + pad = 4B  
319 - # MouseIcon, Picture, Accelerator are 2B + pad = 4B  
320 - for prop in ['fCaption', 'fPicturePosition', 'fBorderColor', 'fSpecialEffect',  
321 - 'fMouseIcon', 'fPicture', 'fAccelerator', 'fGroupName']:  
322 - if propmask[prop]: 339 + if propmask.fValue:
  340 + value_size = consume_CountOfBytesWithCompressionFlag(stream)
  341 + else:
  342 + value_size = 0
  343 + for prop in ['fCaption', 'fPicturePosition', 'fBorderColor', 'fSpecialEffect']:
  344 + if propmask[prop]:
  345 + stream.read(4)
  346 + for prop in ['fMouseIcon', 'fPicture', 'fAccelerator']:
  347 + if propmask[prop]:
  348 + stream.read(2)
  349 + if propmask['fGroupName']:
323 stream.read(4) 350 stream.read(4)
324 # MorphDataExtraDataBlock: [MS-OFORMS] 2.2.5.4 351 # MorphDataExtraDataBlock: [MS-OFORMS] 2.2.5.4
325 stream.read(8) 352 stream.read(8)