From e90e0e5ad120b57419b7e06975fc458f48831b59 Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Fri, 1 Dec 2017 17:24:30 +0100 Subject: [PATCH] ppt records: compensate wrong size in CurrentUserAtom --- oletools/ppt_record_parser.py | 14 ++++++++++++++ oletools/record_base.py | 22 +++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/oletools/ppt_record_parser.py b/oletools/ppt_record_parser.py index 35dacca..7241f9a 100644 --- a/oletools/ppt_record_parser.py +++ b/oletools/ppt_record_parser.py @@ -214,6 +214,20 @@ class PptRecordCurrentUser(PptRecord): def is_document_encrypted(self): return self.header_token == 0xF3D1C4DF + def read_some_more(self, stream): + """ check if unicode user name comes in stream after record + + Can safely do this since no data should come after this record. + """ + more_data = stream.read(3*self.len_user_name) # limit data to read + if self.unicode_user_name is None and \ + len(more_data) == 2*self.len_user_name: + self.unicode_user_name = more_data.decode('utf-16') + logging.debug('found unicode user name BEHIND current user atom') + else: + logging.warning('Unexplained data of size {0} in "Current User" ' + 'stream'.format(len(data))) + # types of relevant records (there are much more than listed here) RECORD_TYPES = dict([ diff --git a/oletools/record_base.py b/oletools/record_base.py index 9f93cbc..76fcb5e 100644 --- a/oletools/record_base.py +++ b/oletools/record_base.py @@ -203,7 +203,12 @@ class OleRecordStream(object): else: self.stream.seek(rec_size, SEEK_CUR) data = None - yield rec_clz(rec_type, rec_size, other, pos, data) + rec_object = rec_clz(rec_type, rec_size, other, pos, data) + + # "We are microsoft, we do not have to adhere to our specifications" + rec_object.read_some_more(self.stream) + yield rec_object + def __str__(self): return '[{0} {1} (type {2}, size {3})' \ @@ -265,6 +270,21 @@ class OleRecordBase(object): """ pass + def read_some_more(self, stream): + """ Read some more data from stream after end of this record + + Found that for CurrentUserAtom in "Current User" stream of ppt files, + the last attribute (user name in unicode) is found *behind* the record + data. Thank you, Microsoft! + + Do this only if you are certain you will not mess up the following + records! + + This base implementation does nothing. For optional overwriting in + subclasses (like PptRecordUserAtom where no record should follow.) + """ + return + def _type_str(self): """ helper for __str__, base implementation """ return '{0} type {1}'.format(self.__class__.__name__, self.type) -- libgit2 0.21.4