Commit e90e0e5ad120b57419b7e06975fc458f48831b59

Authored by Christian Herdtweck
1 parent 470d0806

ppt records: compensate wrong size in CurrentUserAtom

This compensates for an inconsistency that is probably just an error in
some ppt versions. The size attribute of the CurrentUserAtom "forgets"
about the optional unicode user name, which then creates strange data
behind the record (where nothing should be)
oletools/ppt_record_parser.py
... ... @@ -214,6 +214,20 @@ class PptRecordCurrentUser(PptRecord):
214 214 def is_document_encrypted(self):
215 215 return self.header_token == 0xF3D1C4DF
216 216  
  217 + def read_some_more(self, stream):
  218 + """ check if unicode user name comes in stream after record
  219 +
  220 + Can safely do this since no data should come after this record.
  221 + """
  222 + more_data = stream.read(3*self.len_user_name) # limit data to read
  223 + if self.unicode_user_name is None and \
  224 + len(more_data) == 2*self.len_user_name:
  225 + self.unicode_user_name = more_data.decode('utf-16')
  226 + logging.debug('found unicode user name BEHIND current user atom')
  227 + else:
  228 + logging.warning('Unexplained data of size {0} in "Current User" '
  229 + 'stream'.format(len(data)))
  230 +
217 231  
218 232 # types of relevant records (there are much more than listed here)
219 233 RECORD_TYPES = dict([
... ...
oletools/record_base.py
... ... @@ -203,7 +203,12 @@ class OleRecordStream(object):
203 203 else:
204 204 self.stream.seek(rec_size, SEEK_CUR)
205 205 data = None
206   - yield rec_clz(rec_type, rec_size, other, pos, data)
  206 + rec_object = rec_clz(rec_type, rec_size, other, pos, data)
  207 +
  208 + # "We are microsoft, we do not have to adhere to our specifications"
  209 + rec_object.read_some_more(self.stream)
  210 + yield rec_object
  211 +
207 212  
208 213 def __str__(self):
209 214 return '[{0} {1} (type {2}, size {3})' \
... ... @@ -265,6 +270,21 @@ class OleRecordBase(object):
265 270 """
266 271 pass
267 272  
  273 + def read_some_more(self, stream):
  274 + """ Read some more data from stream after end of this record
  275 +
  276 + Found that for CurrentUserAtom in "Current User" stream of ppt files,
  277 + the last attribute (user name in unicode) is found *behind* the record
  278 + data. Thank you, Microsoft!
  279 +
  280 + Do this only if you are certain you will not mess up the following
  281 + records!
  282 +
  283 + This base implementation does nothing. For optional overwriting in
  284 + subclasses (like PptRecordUserAtom where no record should follow.)
  285 + """
  286 + return
  287 +
268 288 def _type_str(self):
269 289 """ helper for __str__, base implementation """
270 290 return '{0} type {1}'.format(self.__class__.__name__, self.type)
... ...