Commit f8664a4f7167c1d98bce36d83140269f8cb32480
1 parent
a5151358
Oleform: rewrite padding handling to support laziness
Padded structures are in fact lazily padded: the pad is only applied when needed, not immediately. For example considering the following struct: ``` | a (1B, opt) | b (1B, opt) | c (1B, opt) | d (1B, opt) | | ... pad ... | | e (4B, opt) | | f (2B, opt) | ... pad ...| ``` If only a and f are present (all other optional parts not used), the actual content will be: | a (1B) | pad (1B) | f (2B) | The previous implementation resulted in: | a (1B) | pad (3B) | f (2B) | pad (2B) | which shifted the whole structure and failed...
Showing
1 changed file
with
77 additions
and
50 deletions
oletools/oleform.py
| @@ -104,6 +104,8 @@ class ExtendedStream(object): | @@ -104,6 +104,8 @@ class ExtendedStream(object): | ||
| 104 | self._jumps = [] | 104 | self._jumps = [] |
| 105 | self._stream = stream | 105 | self._stream = stream |
| 106 | self._path = path | 106 | self._path = path |
| 107 | + self._padding = False | ||
| 108 | + self._pad_start = 0 | ||
| 107 | 109 | ||
| 108 | @classmethod | 110 | @classmethod |
| 109 | def open(cls, ole_file, path): | 111 | def open(cls, ole_file, path): |
| @@ -114,31 +116,55 @@ class ExtendedStream(object): | @@ -114,31 +116,55 @@ class ExtendedStream(object): | ||
| 114 | # print('declared size: %d' % ole_file.get_size(path)) | 116 | # print('declared size: %d' % ole_file.get_size(path)) |
| 115 | return cls(stream, path) | 117 | return cls(stream, path) |
| 116 | 118 | ||
| 117 | - def read(self, size): | 119 | + def _read(self, size): |
| 118 | self._pos += size | 120 | self._pos += size |
| 119 | return self._stream.read(size) | 121 | return self._stream.read(size) |
| 120 | 122 | ||
| 123 | + def _pad(self, start, size=4): | ||
| 124 | + offset = (self._pos - start) % size | ||
| 125 | + if offset: | ||
| 126 | + self._read(size - offset) | ||
| 127 | + | ||
| 128 | + def read(self, size): | ||
| 129 | + if self._padding: | ||
| 130 | + self._pad(self._pad_start, size) | ||
| 131 | + return self._read(size) | ||
| 132 | + | ||
| 121 | def will_jump_to(self, size): | 133 | def will_jump_to(self, size): |
| 122 | - self._next_jump = (True, size) | 134 | + self._next_jump = ('jump', (self._pos, size)) |
| 123 | return self | 135 | return self |
| 124 | 136 | ||
| 125 | - def will_pad(self, pad=4): | ||
| 126 | - self._next_jump = (False, pad) | 137 | + def will_pad(self): |
| 138 | + self._next_jump = ('pad', self._pos) | ||
| 139 | + return self | ||
| 140 | + | ||
| 141 | + def padded_struct(self): | ||
| 142 | + self._next_jump = ('padded', (self._padding, self._pad_start)) | ||
| 143 | + self._padding = True | ||
| 144 | + self._pad_start = self._pos | ||
| 127 | return self | 145 | return self |
| 128 | 146 | ||
| 129 | def __enter__(self): | 147 | def __enter__(self): |
| 130 | - (jump_type, size) = self._next_jump | ||
| 131 | - self._jumps.append((self._pos, jump_type, size)) | 148 | + assert(self._next_jump) |
| 149 | + self._jumps.append(self._next_jump) | ||
| 150 | + self._next_jump = None | ||
| 132 | 151 | ||
| 133 | def __exit__(self, exc_type, exc_value, traceback): | 152 | def __exit__(self, exc_type, exc_value, traceback): |
| 134 | if exc_type is None: | 153 | if exc_type is None: |
| 135 | - (start, jump_type, size) = self._jumps.pop() | ||
| 136 | - if jump_type: | ||
| 137 | - self.read(size - (self._pos - start)) | ||
| 138 | - else: | ||
| 139 | - align = (self._pos - start) % size | ||
| 140 | - if align: | ||
| 141 | - self.read(size - align) | 154 | + (jump_type, data) = self._jumps.pop() |
| 155 | + if jump_type == 'jump': | ||
| 156 | + (start, size) = data | ||
| 157 | + consummed = self._pos - start | ||
| 158 | + if consummed > size: | ||
| 159 | + self.raise_error('Bad jump: too much read ({0} > {1})'.format(consummed, size)) | ||
| 160 | + self.read(size - consummed) | ||
| 161 | + elif jump_type == 'pad': | ||
| 162 | + self._pad(data) | ||
| 163 | + elif jump_type == 'padded': | ||
| 164 | + (prev_padding, prev_pad_start) = data | ||
| 165 | + self._pad(self._pad_start) | ||
| 166 | + self._padding = prev_padding | ||
| 167 | + self._pad_start = prev_pad_start | ||
| 142 | 168 | ||
| 143 | def unpacks(self, format, size): | 169 | def unpacks(self, format, size): |
| 144 | return struct.unpack(format, self.read(size)) | 170 | return struct.unpack(format, self.read(size)) |
| @@ -219,28 +245,27 @@ def consume_OleSiteConcreteControl(stream): | @@ -219,28 +245,27 @@ def consume_OleSiteConcreteControl(stream): | ||
| 219 | with stream.will_jump_to(cbSite): | 245 | with stream.will_jump_to(cbSite): |
| 220 | propmask = SitePropMask(stream.unpack('<L', 4)) | 246 | propmask = SitePropMask(stream.unpack('<L', 4)) |
| 221 | # SiteDataBlock: [MS-OFORMS] 2.2.10.12.3 | 247 | # SiteDataBlock: [MS-OFORMS] 2.2.10.12.3 |
| 222 | - name_len = tag_len = id = 0 | ||
| 223 | - if propmask.fName: | ||
| 224 | - name_len = consume_CountOfBytesWithCompressionFlag(stream) | ||
| 225 | - if propmask.fTag: | ||
| 226 | - tag_len = consume_CountOfBytesWithCompressionFlag(stream) | ||
| 227 | - if propmask.fID: | ||
| 228 | - id = stream.unpack('<L', 4) | ||
| 229 | - for prop in ['fHelpContextID', 'fBitFlags', 'fObjectStreamSize']: | ||
| 230 | - if propmask[prop]: | ||
| 231 | - stream.read(4) | ||
| 232 | - tabindex = ClsidCacheIndex = 0 | ||
| 233 | - with stream.will_pad(): | 248 | + with stream.padded_struct(): |
| 249 | + name_len = tag_len = id = 0 | ||
| 250 | + if propmask.fName: | ||
| 251 | + name_len = consume_CountOfBytesWithCompressionFlag(stream) | ||
| 252 | + if propmask.fTag: | ||
| 253 | + tag_len = consume_CountOfBytesWithCompressionFlag(stream) | ||
| 254 | + if propmask.fID: | ||
| 255 | + id = stream.unpack('<L', 4) | ||
| 256 | + for prop in ['fHelpContextID', 'fBitFlags', 'fObjectStreamSize']: | ||
| 257 | + if propmask[prop]: | ||
| 258 | + stream.read(4) | ||
| 259 | + tabindex = ClsidCacheIndex = 0 | ||
| 234 | if propmask.fTabIndex: | 260 | if propmask.fTabIndex: |
| 235 | tabindex = stream.unpack('<H', 2) | 261 | tabindex = stream.unpack('<H', 2) |
| 236 | if propmask.fClsidCacheIndex: | 262 | if propmask.fClsidCacheIndex: |
| 237 | ClsidCacheIndex = stream.unpack('<H', 2) | 263 | ClsidCacheIndex = stream.unpack('<H', 2) |
| 238 | if propmask.fGroupID: | 264 | if propmask.fGroupID: |
| 239 | stream.read(2) | 265 | stream.read(2) |
| 240 | - # For the next 4 entries, the documentation adds padding, but it should already be aligned?? | ||
| 241 | - for prop in ['fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource']: | ||
| 242 | - if propmask[prop]: | ||
| 243 | - stream.read(4) | 266 | + for prop in ['fControlTipText', 'fRuntimeLicKey', 'fControlSource', 'fRowSource']: |
| 267 | + if propmask[prop]: | ||
| 268 | + stream.read(4) | ||
| 244 | # SiteExtraDataBlock: [MS-OFORMS] 2.2.10.12.4 | 269 | # SiteExtraDataBlock: [MS-OFORMS] 2.2.10.12.4 |
| 245 | name = stream.read(name_len) | 270 | name = stream.read(name_len) |
| 246 | tag = stream.read(tag_len) | 271 | tag = stream.read(tag_len) |
| @@ -291,35 +316,37 @@ def consume_MorphDataControl(stream): | @@ -291,35 +316,37 @@ def consume_MorphDataControl(stream): | ||
| 291 | with stream.will_jump_to(cbMorphData): | 316 | with stream.will_jump_to(cbMorphData): |
| 292 | propmask = MorphDataPropMask(stream.unpack('<Q', 8)) | 317 | propmask = MorphDataPropMask(stream.unpack('<Q', 8)) |
| 293 | # MorphDataDataBlock: [MS-OFORMS] 2.2.5.3 | 318 | # MorphDataDataBlock: [MS-OFORMS] 2.2.5.3 |
| 294 | - for prop in ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength']: | ||
| 295 | - if propmask[prop]: | ||
| 296 | - stream.read(4) | ||
| 297 | - with stream.will_pad(): | 319 | + with stream.padded_struct(): |
| 320 | + for prop in ['fVariousPropertyBits', 'fBackColor', 'fForeColor', 'fMaxLength']: | ||
| 321 | + if propmask[prop]: | ||
| 322 | + stream.read(4) | ||
| 298 | for prop in ['fBorderStyle', 'fScrollBars', 'fDisplayStyle', 'fMousePointer']: | 323 | for prop in ['fBorderStyle', 'fScrollBars', 'fDisplayStyle', 'fMousePointer']: |
| 299 | if propmask[prop]: | 324 | if propmask[prop]: |
| 300 | stream.read(1) | 325 | stream.read(1) |
| 301 | - # PasswordChar, BoundColumn, TextColumn, ColumnCount, and ListRows are 2B + pad = 4B | ||
| 302 | - # ListWidth is 4B + pad = 4B | ||
| 303 | - for prop in ['fPasswordChar', 'fListWidth', 'fBoundColumn', 'fTextColumn', 'fColumnCount', | ||
| 304 | - 'fListRows']: | ||
| 305 | - if propmask[prop]: | 326 | + if propmask['fPasswordChar']: |
| 327 | + stream.read(2) | ||
| 328 | + if propmask['fListWidth']: | ||
| 306 | stream.read(4) | 329 | stream.read(4) |
| 307 | - with stream.will_pad(): | 330 | + for prop in ['fBoundColumn', 'fTextColumn', 'fColumnCount', 'fListRows']: |
| 331 | + if propmask[prop]: | ||
| 332 | + stream.read(2) | ||
| 308 | if propmask.fcColumnInfo: | 333 | if propmask.fcColumnInfo: |
| 309 | stream.read(2) | 334 | stream.read(2) |
| 310 | - for prop in ['fMatchEntry', 'fListStyle', 'fShowDropButtonWhen', 'fDropButtonStyle', | ||
| 311 | - 'fMultiSelect']: | 335 | + for prop in ['fMatchEntry', 'fListStyle', 'fShowDropButtonWhen', |
| 336 | + 'fDropButtonStyle', 'fMultiSelect']: | ||
| 312 | if propmask[prop]: | 337 | if propmask[prop]: |
| 313 | stream.read(1) | 338 | stream.read(1) |
| 314 | - if propmask.fValue: | ||
| 315 | - value_size = consume_CountOfBytesWithCompressionFlag(stream) | ||
| 316 | - else: | ||
| 317 | - value_size = 0 | ||
| 318 | - # Caption, PicturePosition, BorderColor, SpecialEffect, GroupName are 4B + pad = 4B | ||
| 319 | - # MouseIcon, Picture, Accelerator are 2B + pad = 4B | ||
| 320 | - for prop in ['fCaption', 'fPicturePosition', 'fBorderColor', 'fSpecialEffect', | ||
| 321 | - 'fMouseIcon', 'fPicture', 'fAccelerator', 'fGroupName']: | ||
| 322 | - if propmask[prop]: | 339 | + if propmask.fValue: |
| 340 | + value_size = consume_CountOfBytesWithCompressionFlag(stream) | ||
| 341 | + else: | ||
| 342 | + value_size = 0 | ||
| 343 | + for prop in ['fCaption', 'fPicturePosition', 'fBorderColor', 'fSpecialEffect']: | ||
| 344 | + if propmask[prop]: | ||
| 345 | + stream.read(4) | ||
| 346 | + for prop in ['fMouseIcon', 'fPicture', 'fAccelerator']: | ||
| 347 | + if propmask[prop]: | ||
| 348 | + stream.read(2) | ||
| 349 | + if propmask['fGroupName']: | ||
| 323 | stream.read(4) | 350 | stream.read(4) |
| 324 | # MorphDataExtraDataBlock: [MS-OFORMS] 2.2.5.4 | 351 | # MorphDataExtraDataBlock: [MS-OFORMS] 2.2.5.4 |
| 325 | stream.read(8) | 352 | stream.read(8) |