Commit e90e0e5ad120b57419b7e06975fc458f48831b59

Authored by Christian Herdtweck
1 parent 470d0806

ppt records: compensate wrong size in CurrentUserAtom

This compensates for an inconsistency that is probably just an error in
some ppt versions. The size attribute of the CurrentUserAtom "forgets"
about the optional unicode user name, which then creates strange data
behind the record (where nothing should be)
oletools/ppt_record_parser.py
@@ -214,6 +214,20 @@ class PptRecordCurrentUser(PptRecord): @@ -214,6 +214,20 @@ class PptRecordCurrentUser(PptRecord):
214 def is_document_encrypted(self): 214 def is_document_encrypted(self):
215 return self.header_token == 0xF3D1C4DF 215 return self.header_token == 0xF3D1C4DF
216 216
  217 + def read_some_more(self, stream):
  218 + """ check if unicode user name comes in stream after record
  219 +
  220 + Can safely do this since no data should come after this record.
  221 + """
  222 + more_data = stream.read(3*self.len_user_name) # limit data to read
  223 + if self.unicode_user_name is None and \
  224 + len(more_data) == 2*self.len_user_name:
  225 + self.unicode_user_name = more_data.decode('utf-16')
  226 + logging.debug('found unicode user name BEHIND current user atom')
  227 + else:
  228 + logging.warning('Unexplained data of size {0} in "Current User" '
  229 + 'stream'.format(len(data)))
  230 +
217 231
218 # types of relevant records (there are much more than listed here) 232 # types of relevant records (there are much more than listed here)
219 RECORD_TYPES = dict([ 233 RECORD_TYPES = dict([
oletools/record_base.py
@@ -203,7 +203,12 @@ class OleRecordStream(object): @@ -203,7 +203,12 @@ class OleRecordStream(object):
203 else: 203 else:
204 self.stream.seek(rec_size, SEEK_CUR) 204 self.stream.seek(rec_size, SEEK_CUR)
205 data = None 205 data = None
206 - yield rec_clz(rec_type, rec_size, other, pos, data) 206 + rec_object = rec_clz(rec_type, rec_size, other, pos, data)
  207 +
  208 + # "We are microsoft, we do not have to adhere to our specifications"
  209 + rec_object.read_some_more(self.stream)
  210 + yield rec_object
  211 +
207 212
208 def __str__(self): 213 def __str__(self):
209 return '[{0} {1} (type {2}, size {3})' \ 214 return '[{0} {1} (type {2}, size {3})' \
@@ -265,6 +270,21 @@ class OleRecordBase(object): @@ -265,6 +270,21 @@ class OleRecordBase(object):
265 """ 270 """
266 pass 271 pass
267 272
  273 + def read_some_more(self, stream):
  274 + """ Read some more data from stream after end of this record
  275 +
  276 + Found that for CurrentUserAtom in "Current User" stream of ppt files,
  277 + the last attribute (user name in unicode) is found *behind* the record
  278 + data. Thank you, Microsoft!
  279 +
  280 + Do this only if you are certain you will not mess up the following
  281 + records!
  282 +
  283 + This base implementation does nothing. For optional overwriting in
  284 + subclasses (like PptRecordUserAtom where no record should follow.)
  285 + """
  286 + return
  287 +
268 def _type_str(self): 288 def _type_str(self):
269 """ helper for __str__, base implementation """ 289 """ helper for __str__, base implementation """
270 return '{0} type {1}'.format(self.__class__.__name__, self.type) 290 return '{0} type {1}'.format(self.__class__.__name__, self.type)