Commit e90e0e5ad120b57419b7e06975fc458f48831b59
1 parent
470d0806
ppt records: compensate wrong size in CurrentUserAtom
This compensates for an inconsistency that is probably just an error in some ppt versions. The size attribute of the CurrentUserAtom "forgets" about the optional unicode user name, which then creates strange data behind the record (where nothing should be)
Showing
2 changed files
with
35 additions
and
1 deletions
oletools/ppt_record_parser.py
| @@ -214,6 +214,20 @@ class PptRecordCurrentUser(PptRecord): | @@ -214,6 +214,20 @@ class PptRecordCurrentUser(PptRecord): | ||
| 214 | def is_document_encrypted(self): | 214 | def is_document_encrypted(self): |
| 215 | return self.header_token == 0xF3D1C4DF | 215 | return self.header_token == 0xF3D1C4DF |
| 216 | 216 | ||
| 217 | + def read_some_more(self, stream): | ||
| 218 | + """ check if unicode user name comes in stream after record | ||
| 219 | + | ||
| 220 | + Can safely do this since no data should come after this record. | ||
| 221 | + """ | ||
| 222 | + more_data = stream.read(3*self.len_user_name) # limit data to read | ||
| 223 | + if self.unicode_user_name is None and \ | ||
| 224 | + len(more_data) == 2*self.len_user_name: | ||
| 225 | + self.unicode_user_name = more_data.decode('utf-16') | ||
| 226 | + logging.debug('found unicode user name BEHIND current user atom') | ||
| 227 | + else: | ||
| 228 | + logging.warning('Unexplained data of size {0} in "Current User" ' | ||
| 229 | + 'stream'.format(len(data))) | ||
| 230 | + | ||
| 217 | 231 | ||
| 218 | # types of relevant records (there are much more than listed here) | 232 | # types of relevant records (there are much more than listed here) |
| 219 | RECORD_TYPES = dict([ | 233 | RECORD_TYPES = dict([ |
oletools/record_base.py
| @@ -203,7 +203,12 @@ class OleRecordStream(object): | @@ -203,7 +203,12 @@ class OleRecordStream(object): | ||
| 203 | else: | 203 | else: |
| 204 | self.stream.seek(rec_size, SEEK_CUR) | 204 | self.stream.seek(rec_size, SEEK_CUR) |
| 205 | data = None | 205 | data = None |
| 206 | - yield rec_clz(rec_type, rec_size, other, pos, data) | 206 | + rec_object = rec_clz(rec_type, rec_size, other, pos, data) |
| 207 | + | ||
| 208 | + # "We are microsoft, we do not have to adhere to our specifications" | ||
| 209 | + rec_object.read_some_more(self.stream) | ||
| 210 | + yield rec_object | ||
| 211 | + | ||
| 207 | 212 | ||
| 208 | def __str__(self): | 213 | def __str__(self): |
| 209 | return '[{0} {1} (type {2}, size {3})' \ | 214 | return '[{0} {1} (type {2}, size {3})' \ |
| @@ -265,6 +270,21 @@ class OleRecordBase(object): | @@ -265,6 +270,21 @@ class OleRecordBase(object): | ||
| 265 | """ | 270 | """ |
| 266 | pass | 271 | pass |
| 267 | 272 | ||
| 273 | + def read_some_more(self, stream): | ||
| 274 | + """ Read some more data from stream after end of this record | ||
| 275 | + | ||
| 276 | + Found that for CurrentUserAtom in "Current User" stream of ppt files, | ||
| 277 | + the last attribute (user name in unicode) is found *behind* the record | ||
| 278 | + data. Thank you, Microsoft! | ||
| 279 | + | ||
| 280 | + Do this only if you are certain you will not mess up the following | ||
| 281 | + records! | ||
| 282 | + | ||
| 283 | + This base implementation does nothing. For optional overwriting in | ||
| 284 | + subclasses (like PptRecordUserAtom where no record should follow.) | ||
| 285 | + """ | ||
| 286 | + return | ||
| 287 | + | ||
| 268 | def _type_str(self): | 288 | def _type_str(self): |
| 269 | """ helper for __str__, base implementation """ | 289 | """ helper for __str__, base implementation """ |
| 270 | return '{0} type {1}'.format(self.__class__.__name__, self.type) | 290 | return '{0} type {1}'.format(self.__class__.__name__, self.type) |