Commit f8664a4f7167c1d98bce36d83140269f8cb32480

Authored by Vincent Brillault
1 parent a5151358

Oleform: rewrite padding handling to support laziness

Padded structures are in fact lazily padded: the pad is only applied when
needed, not immediately. For example considering the following struct:
```
| a (1B, opt) | b (1B, opt) | c (1B, opt) | d (1B, opt) |
| ... pad ... |
| e (4B, opt) |
| f (2B, opt) | ... pad ...|
```
If only a and f are present (all other optional parts not used), the
actual content will be: | a (1B) | pad (1B) | f (2B) |

The previous implementation resulted in:
| a (1B) | pad (3B) | f (2B) | pad (2B) |
which shifted the whole structure and failed...
Showing 1 changed file with 77 additions and 50 deletions
oletools/oleform.py
... ... @@ -104,6 +104,8 @@ class ExtendedStream(object):
104 104 self._jumps = []
105 105 self._stream = stream
106 106 self._path = path
  107 + self._padding = False
  108 + self._pad_start = 0
107 109  
108 110 @classmethod
109 111 def open(cls, ole_file, path):
... ... @@ -114,31 +116,55 @@ class ExtendedStream(object):
114 116 # print('declared size: %d' % ole_file.get_size(path))
115 117 return cls(stream, path)
116 118  
117   - def read(self, size):
  119 + def _read(self, size):
118 120 self._pos += size
119 121 return self._stream.read(size)
120 122  
  123 + def _pad(self, start, size=4):
  124 + offset = (self._pos - start) % size
  125 + if offset:
  126 + self._read(size - offset)
  127 +
  128 + def read(self, size):
  129 + if self._padding:
  130 + self._pad(self._pad_start, size)
  131 + return self._read(size)
  132 +
121 133 def will_jump_to(self, size):
122   - self._next_jump = (True, size)
  134 + self._next_jump = ('jump', (self._pos, size))
123 135 return self
124 136  
125   - def will_pad(self, pad=4):
126   - self._next_jump = (False, pad)
  137 + def will_pad(self):
  138 + self._next_jump = ('pad', self._pos)
  139 + return self
  140 +
  141 + def padded_struct(self):
  142 + self._next_jump = ('padded', (self._padding, self._pad_start))
  143 + self._padding = True
  144 + self._pad_start = self._pos
127 145 return self
128 146  
129 147 def __enter__(self):
130   - (jump_type, size) = self._next_jump
131   - self._jumps.append((self._pos, jump_type, size))
  148 + assert(self._next_jump)
  149 + self._jumps.append(self._next_jump)
  150 + self._next_jump = None
132 151  
133 152 def __exit__(self, exc_type, exc_value, traceback):
134 153 if exc_type is None:
135   - (start, jump_type, size) = self._jumps.pop()
136   - if jump_type:
137   - self.read(size - (self._pos - start))
138   - else:
139   - align = (self._pos - start) % size
140   - if align:
141   - self.read(size - align)
  154 + (jump_type, data) = self._jumps.pop()
  155 + if jump_type == 'jump':
  156 + (start, size) = data
  157 + consummed = self._pos - start
  158 + if consummed > size:
  159 + self.raise_error('Bad jump: too much read ({0} > {1})'.format(consummed, size))
  160 + self.read(size - consummed)
  161 + elif jump_type == 'pad':
  162 + self._pad(data)
  163 + elif jump_type == 'padded':
  164 + (prev_padding, prev_pad_start) = data
  165 + self._pad(self._pad_start)
  166 + self._padding = prev_padding
  167 + self._pad_start = prev_pad_start
142 168  
143 169 def unpacks(self, format, size):
144 170 return struct.unpack(format, self.read(size))
... ... @@ -219,28 +245,27 @@ def consume_OleSiteConcreteControl(stream):
219 245 with stream.will_jump_to(cbSite):
220 246 propmask = SitePropMask(stream.unpack('<L', 4))
221 247 # SiteDataBlock: [MS-OFORMS] 2.2.10.12.3
222   - name_len = tag_len = id = 0
223   - if propmask.fName:
224   - name_len = consume_CountOfBytesWithCompressionFlag(stream)
225   - if propmask.fTag:
226   - tag_len = consume_CountOfBytesWithCompressionFlag(stream)
227   - if propmask.fID:
228   - id = stream.unpack('<L', 4)
229   - for prop in ['fHelpContextID', 'fBitFlags', 'fObjectStreamSize']:
230   - if propmask[prop]:
231   - stream.read(4)
232   - tabindex = ClsidCacheIndex = 0
233   - with stream.will_pad():
  248 + with stream.padded_struct():
  249 + name_len = tag_len = id = 0
  250 + if propmask.fName:
  251 + name_len = consume_CountOfBytesWithCompressionFlag(stream)
  252 + if propmask.fTag:
  253 + tag_len = consume_CountOfBytesWithCompressionFlag(stream)
  254 + if propmask.fID:
  255 + id = stream.unpack('<L', 4)
  256 + for prop in ['fHelpContextID', 'fBitFlags', 'fObjectStreamSize']:
  257 + if propmask[prop]:
  258 + stream.read(4)
  259 + tabindex = ClsidCacheIndex = 0
234 260 if propmask.fTabIndex:
235 261 tabindex = stream.unpack('<H', 2)
236 262 if propmask.fClsidCacheIndex:
237 263 ClsidCacheIndex = stream.unpack('<H', 2)
238 264 if propmask.fGroupID:
239 265 stream.read(2)
240   - # For the next 4 entries, the documentation adds padding, but it should already be aligned??
241   - for prop in ['fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource']:
242   - if propmask[prop]:
243   - stream.read(4)
  266 + for prop in ['fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource']:
  267 + if propmask[prop]:
  268 + stream.read(4)
244 269 # SiteExtraDataBlock: [MS-OFORMS] 2.2.10.12.4
245 270 name = stream.read(name_len)
246 271 tag = stream.read(tag_len)
... ... @@ -291,35 +316,37 @@ def consume_MorphDataControl(stream):
291 316 with stream.will_jump_to(cbMorphData):
292 317 propmask = MorphDataPropMask(stream.unpack('<Q', 8))
293 318 # MorphDataDataBlock: [MS-OFORMS] 2.2.5.3
294   - for prop in ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength']:
295   - if propmask[prop]:
296   - stream.read(4)
297   - with stream.will_pad():
  319 + with stream.padded_struct():
  320 + for prop in ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength']:
  321 + if propmask[prop]:
  322 + stream.read(4)
298 323 for prop in ['fBorderStyle', 'fScrollBars', 'fDisplayStyle', 'fMousePointer']:
299 324 if propmask[prop]:
300 325 stream.read(1)
301   - # PasswordChar, BoundColumn, TextColumn, ColumnCount, and ListRows are 2B + pad = 4B
302   - # ListWidth is 4B + pad = 4B
303   - for prop in ['fPasswordChar', 'fListWidth', 'fBoundColumn', 'fTextColumn', 'fColumnCount',
304   - 'fListRows']:
305   - if propmask[prop]:
  326 + if propmask['fPasswordChar']:
  327 + stream.read(2)
  328 + if propmask['fListWidth']:
306 329 stream.read(4)
307   - with stream.will_pad():
  330 + for prop in ['fBoundColumn', 'fTextColumn', 'fColumnCount', 'fListRows']:
  331 + if propmask[prop]:
  332 + stream.read(2)
308 333 if propmask.fcColumnInfo:
309 334 stream.read(2)
310   - for prop in ['fMatchEntry', 'fListStyle', 'fShowDropButtonWhen', 'fDropButtonStyle',
311   - 'fMultiSelect']:
  335 + for prop in ['fMatchEntry', 'fListStyle', 'fShowDropButtonWhen',
  336 + 'fDropButtonStyle', 'fMultiSelect']:
312 337 if propmask[prop]:
313 338 stream.read(1)
314   - if propmask.fValue:
315   - value_size = consume_CountOfBytesWithCompressionFlag(stream)
316   - else:
317   - value_size = 0
318   - # Caption, PicturePosition, BorderColor, SpecialEffect, GroupName are 4B + pad = 4B
319   - # MouseIcon, Picture, Accelerator are 2B + pad = 4B
320   - for prop in ['fCaption', 'fPicturePosition', 'fBorderColor', 'fSpecialEffect',
321   - 'fMouseIcon', 'fPicture', 'fAccelerator', 'fGroupName']:
322   - if propmask[prop]:
  339 + if propmask.fValue:
  340 + value_size = consume_CountOfBytesWithCompressionFlag(stream)
  341 + else:
  342 + value_size = 0
  343 + for prop in ['fCaption', 'fPicturePosition', 'fBorderColor', 'fSpecialEffect']:
  344 + if propmask[prop]:
  345 + stream.read(4)
  346 + for prop in ['fMouseIcon', 'fPicture', 'fAccelerator']:
  347 + if propmask[prop]:
  348 + stream.read(2)
  349 + if propmask['fGroupName']:
323 350 stream.read(4)
324 351 # MorphDataExtraDataBlock: [MS-OFORMS] 2.2.5.4
325 352 stream.read(8)
... ...