Commit dd5ee6df43c6ec547b428089ba4d5ea5e28f1ae4

Authored by Christian Herdtweck
1 parent 8ae664a2

continue with UserEditAtom

Showing 1 changed file with 119 additions and 31 deletions
oletools/ppt_parser.py
@@ -17,6 +17,7 @@ References: @@ -17,6 +17,7 @@ References:
17 #------------------------------------------------------------------------------ 17 #------------------------------------------------------------------------------
18 # TODO: 18 # TODO:
19 # - license 19 # - license
  20 +# - create a AtomBase class that defines check_value and parses RecordHead?
20 # 21 #
21 # CHANGELOG: 22 # CHANGELOG:
22 # 2016-05-04 v0.01 CH: - start parsing "Current User" stream 23 # 2016-05-04 v0.01 CH: - start parsing "Current User" stream
@@ -61,6 +62,21 @@ class PptUnexpectedData(Exception): @@ -61,6 +62,21 @@ class PptUnexpectedData(Exception):
61 # === STRUCTS ================================================================= 62 # === STRUCTS =================================================================
62 63
63 64
  65 +def check_value(name, value, expected):
  66 + """ simplify verification of values in extract_from """
  67 + if isinstance(expected, (list, tuple)):
  68 + if value not in expected:
  69 + exp_str = '[' + ' OR '.join('{0:04X}'.format(val)
  70 + for val in expected) + ']'
  71 + raise PptUnexpectedData(
  72 + 'Current User', name,
  73 + '{0:04X}'.format(value), exp_str)
  74 + elif expected != value:
  75 + raise PptUnexpectedData(
  76 + 'Current User', name,
  77 + '{0:04X}'.format(value), '{0:04X}'.format(expected))
  78 +
  79 +
64 class RecordHeader(object): 80 class RecordHeader(object):
65 """ a record header, often found in ppt files 81 """ a record header, often found in ppt files
66 82
@@ -123,6 +139,9 @@ class CurrentUserAtom(object): @@ -123,6 +139,9 @@ class CurrentUserAtom(object):
123 self.unicode_user_name = None 139 self.unicode_user_name = None
124 self.rel_version = None 140 self.rel_version = None
125 141
  142 + def is_encrypted(self):
  143 + return self.header_token == self.HEADER_TOKEN_ENCRYPT
  144 +
126 @classmethod 145 @classmethod
127 def extract_from(clz, ole): 146 def extract_from(clz, ole):
128 """ extract info from olefile """ 147 """ extract info from olefile """
@@ -137,21 +156,19 @@ class CurrentUserAtom(object): @@ -137,21 +156,19 @@ class CurrentUserAtom(object):
137 156
138 # parse record header 157 # parse record header
139 obj.rec_head = RecordHeader.extract_from(stream) 158 obj.rec_head = RecordHeader.extract_from(stream)
140 - obj.check_value('rec_version', obj.rec_head.rec_ver, 0)  
141 - obj.check_value('rec_instance', obj.rec_head.rec_ver, 0)  
142 - obj.check_value('rec_instance', obj.rec_head.rec_type,  
143 - clz.RECORD_TYPE) 159 + check_value('rec_version', obj.rec_head.rec_ver, 0)
  160 + check_value('rec_instance', obj.rec_head.rec_ver, 0)
  161 + check_value('rec_type', obj.rec_head.rec_type, clz.RECORD_TYPE)
144 162
145 size, = struct.unpack('<L', stream.read(4)) 163 size, = struct.unpack('<L', stream.read(4))
146 - obj.check_value('size', size, obj.SIZE) 164 + check_value('size', size, obj.SIZE)
147 obj.header_token, = struct.unpack('<L', stream.read(4)) 165 obj.header_token, = struct.unpack('<L', stream.read(4))
148 - obj.check_value('headerToken', obj.header_token,  
149 - [clz.HEADER_TOKEN_ENCRYPT,  
150 - clz.HEADER_TOKEN_NOCRYPT]) 166 + check_value('headerToken', obj.header_token,
  167 + [clz.HEADER_TOKEN_ENCRYPT, clz.HEADER_TOKEN_NOCRYPT])
151 log.debug('headerToken is encrypt: {}' 168 log.debug('headerToken is encrypt: {}'
152 .format(obj.header_token == clz.HEADER_TOKEN_ENCRYPT)) 169 .format(obj.header_token == clz.HEADER_TOKEN_ENCRYPT))
153 obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4)) 170 obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4))
154 - log.debug('offsetToCurrentEdit: {0} ({0:04X})' 171 + log.debug('offsetToCurrentEdit: {0} (0x{0:04X})'
155 .format(obj.offset_to_current_edit)) 172 .format(obj.offset_to_current_edit))
156 obj.len_user_name, = struct.unpack('<H', stream.read(2)) 173 obj.len_user_name, = struct.unpack('<H', stream.read(2))
157 log.debug('lenUserName: {}'.format(obj.len_user_name)) 174 log.debug('lenUserName: {}'.format(obj.len_user_name))
@@ -160,22 +177,19 @@ class CurrentUserAtom(object): @@ -160,22 +177,19 @@ class CurrentUserAtom(object):
160 'Current User', 'CurrentUserAtom.lenUserName', 177 'Current User', 'CurrentUserAtom.lenUserName',
161 obj.len_user_name, '< 256') 178 obj.len_user_name, '< 256')
162 obj.doc_file_version, = struct.unpack('<H', stream.read(2)) 179 obj.doc_file_version, = struct.unpack('<H', stream.read(2))
163 - obj.check_value('docFileVersion', obj.doc_file_version,  
164 - clz.DOC_FILE_VERSION) 180 + check_value('docFileVersion', obj.doc_file_version,
  181 + clz.DOC_FILE_VERSION)
165 obj.major_version, = struct.unpack('<B', stream.read(1)) 182 obj.major_version, = struct.unpack('<B', stream.read(1))
166 - obj.check_value('majorVersion', obj.major_version,  
167 - clz.MAJOR_VERSION) 183 + check_value('majorVersion', obj.major_version, clz.MAJOR_VERSION)
168 obj.minor_version, = struct.unpack('<B', stream.read(1)) 184 obj.minor_version, = struct.unpack('<B', stream.read(1))
169 - obj.check_value('minorVersion', obj.minor_version,  
170 - clz.MINOR_VERSION) 185 + check_value('minorVersion', obj.minor_version, clz.MINOR_VERSION)
171 stream.read(2) # unused 186 stream.read(2) # unused
172 obj.ansi_user_name = stream.read(obj.len_user_name) 187 obj.ansi_user_name = stream.read(obj.len_user_name)
173 log.debug('ansiUserName: {!r}'.format(obj.ansi_user_name)) 188 log.debug('ansiUserName: {!r}'.format(obj.ansi_user_name))
174 obj.rel_version, = struct.unpack('<L', stream.read(4)) 189 obj.rel_version, = struct.unpack('<L', stream.read(4))
175 log.debug('relVersion: {0:04X}'.format(obj.rel_version)) 190 log.debug('relVersion: {0:04X}'.format(obj.rel_version))
176 - obj.check_value('relVersion', obj.rel_version,  
177 - [clz.REL_VERSION_CAN_USE,  
178 - clz.REL_VERSION_NO_USE]) 191 + check_value('relVersion', obj.rel_version,
  192 + [clz.REL_VERSION_CAN_USE, clz.REL_VERSION_NO_USE])
179 obj.unicode_user_name = stream.read(2 * obj.len_user_name) 193 obj.unicode_user_name = stream.read(2 * obj.len_user_name)
180 log.debug('unicodeUserName: {!r}'.format(obj.unicode_user_name)) 194 log.debug('unicodeUserName: {!r}'.format(obj.unicode_user_name))
181 195
@@ -188,19 +202,67 @@ class CurrentUserAtom(object): @@ -188,19 +202,67 @@ class CurrentUserAtom(object):
188 log.debug('closing stream') 202 log.debug('closing stream')
189 stream.close() 203 stream.close()
190 204
191 - def check_value(self, name, value, expected):  
192 - """ simplify verification of values in extract_from """  
193 - if isinstance(expected, (list, tuple)):  
194 - if value not in expected:  
195 - exp_str = '[' + ' OR '.join('{0:04X}'.format(val)  
196 - for val in expected) + ']'  
197 - raise PptUnexpectedData(  
198 - 'Current User', 'CurrentUserAtom.' + name,  
199 - '{0:04X}'.format(value), exp_str)  
200 - elif expected != value:  
201 - raise PptUnexpectedData(  
202 - 'Current User', 'CurrentUserAtom.' + name,  
203 - '{0:04X}'.format(value), '{0:04X}'.format(expected)) 205 +class UserEditAtom(object):
  206 + """ An atom record that specifies information about a user edit
  207 +
  208 + https://msdn.microsoft.com/en-us/library/dd945746%28v=office.12%29.aspx
  209 + """
  210 +
  211 + RECORD_TYPE = 0x0FF5
  212 + MINOR_VERSION = 0x00
  213 + MAJOR_VERSION = 0x03
  214 +
  215 + def __init__(self):
  216 + self.rec_head = None
  217 + self.last_slide_id_ref = None
  218 + self.version = None
  219 + self.minor_version = None
  220 + self.major_version = None
  221 + self.offset_last_edit = None
  222 + self.offset_persist_directory = None
  223 + self.doc_persist_id_ref = None
  224 + self.persist_id_seed = None
  225 + self.last_view = None
  226 + self.encrypt_session_persist_id_ref = None
  227 +
  228 + @classmethod
  229 + def extract_from(clz, stream, is_encrypted):
  230 + """ extract info from given stream (already positioned correctly!) """
  231 +
  232 + log.debug('extract UserEditAtom from stream')
  233 +
  234 + obj = clz()
  235 +
  236 + # parse record header
  237 + obj.rec_head = RecordHeader.extract_from(stream)
  238 + check_value('rec_version', obj.rec_head.rec_ver, 0)
  239 + check_value('rec_instance', obj.rec_head.rec_ver, 0)
  240 + check_value('rec_type', obj.rec_head.rec_type, clz.RECORD_TYPE)
  241 +
  242 + obj.last_slide_id_ref, = struct.unpack('<L', stream.read(4))
  243 + obj.version, = struct.unpack('<H', stream.read(2))
  244 + obj.minor_version, = struct.unpack('<B', stream.read(1))
  245 + check_value('minorVersion', obj.minor_version, clz.MINOR_VERSION)
  246 + obj.major_version, = struct.unpack('<B', stream.read(1))
  247 + check_value('majorVersion', obj.major_version, clz.MAJOR_VERSION)
  248 + obj.offset_last_edit, = struct.unpack('<L', stream.read(4))
  249 + log.debug('offsetLastEdit: {0} (0x{0:04X})'.format(obj.offset_last_edit))
  250 + # todo: check that this is before start pos / prev pos; 0x000 is end
  251 + obj.offset_persist_directory, = struct.unpack('<L', stream.read(4))
  252 + log.debug('offsetPersistDir: {0} (0x{0:04X})'
  253 + .format(obj.offset_persist_directory))
  254 + obj.doc_persist_id_ref, = struct.unpack('<L', stream.read(4))
  255 + check_value('docPersistIdRef', obj.doc_persist_id_ref, 1)
  256 + obj.persist_id_seed, = struct.unpack('<L', stream.read(4))
  257 + obj.last_view, = struct.unpack('<H', stream.read(2))
  258 + stream.read(2) # unused
  259 + if is_encrypted:
  260 + obj.encrypt_session_persist_id_ref, = struct.unpack('<L',
  261 + stream.read(4))
  262 + else:
  263 + obj.encrypt_session_persist_id_ref = None
  264 +
  265 + return obj
204 266
205 267
206 # === PptParser =============================================================== 268 # === PptParser ===============================================================
@@ -227,6 +289,8 @@ class PptParser(object): @@ -227,6 +289,8 @@ class PptParser(object):
227 289
228 self.fast_fail = fast_fail 290 self.fast_fail = fast_fail
229 291
  292 + self.current_user_atom = None
  293 +
230 # basic compatibility check: root directory structure is 294 # basic compatibility check: root directory structure is
231 # [['\x05DocumentSummaryInformation'], 295 # [['\x05DocumentSummaryInformation'],
232 # ['\x05SummaryInformation'], 296 # ['\x05SummaryInformation'],
@@ -282,6 +346,10 @@ class PptParser(object): @@ -282,6 +346,10 @@ class PptParser(object):
282 https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx 346 https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx
283 """ 347 """
284 348
  349 + if self.current_user_atom is not None:
  350 + log.warning('re-reading and overwriting '
  351 + 'previously read CurrentUserAtom')
  352 +
285 try: 353 try:
286 self.current_user_atom = CurrentUserAtom.extract_from(self.ole) 354 self.current_user_atom = CurrentUserAtom.extract_from(self.ole)
287 except Exception: 355 except Exception:
@@ -290,6 +358,25 @@ class PptParser(object): @@ -290,6 +358,25 @@ class PptParser(object):
290 else: 358 else:
291 self._log_exception() 359 self._log_exception()
292 360
  361 + def construct_persist_object_directory(self):
  362 + """ part 2 """
  363 +
  364 + if self.current_user_atom is None:
  365 + self.parse_current_user()
  366 +
  367 + offset = self.current_user_atom.offset_to_current_edit
  368 + is_encrypted = self.current_user_atom.is_encrypted()
  369 + stream = None
  370 +
  371 + try:
  372 + stream = self.ole.openstream('PowerPoint Document')
  373 + stream.seek(offset)
  374 + user_edit = UserEditAtom.extract_from(stream, is_encrypted)
  375 + finally:
  376 + if stream is not None:
  377 + log.debug('closing stream')
  378 + stream.close()
  379 +
293 # === TESTING ================================================================= 380 # === TESTING =================================================================
294 381
295 def test(): 382 def test():
@@ -305,6 +392,7 @@ def test(): @@ -305,6 +392,7 @@ def test():
305 # parse 392 # parse
306 ppt = PptParser(test_file) 393 ppt = PptParser(test_file)
307 ppt.parse_current_user() 394 ppt.parse_current_user()
  395 + ppt.construct_persist_object_directory()
308 396
309 397
310 if __name__ == '__main__': 398 if __name__ == '__main__':