Commit dd5ee6df43c6ec547b428089ba4d5ea5e28f1ae4
1 parent
8ae664a2
continue with UserEditAtom
Showing
1 changed file
with
119 additions
and
31 deletions
oletools/ppt_parser.py
| @@ -17,6 +17,7 @@ References: | @@ -17,6 +17,7 @@ References: | ||
| 17 | #------------------------------------------------------------------------------ | 17 | #------------------------------------------------------------------------------ |
| 18 | # TODO: | 18 | # TODO: |
| 19 | # - license | 19 | # - license |
| 20 | +# - create a AtomBase class that defines check_value and parses RecordHead? | ||
| 20 | # | 21 | # |
| 21 | # CHANGELOG: | 22 | # CHANGELOG: |
| 22 | # 2016-05-04 v0.01 CH: - start parsing "Current User" stream | 23 | # 2016-05-04 v0.01 CH: - start parsing "Current User" stream |
| @@ -61,6 +62,21 @@ class PptUnexpectedData(Exception): | @@ -61,6 +62,21 @@ class PptUnexpectedData(Exception): | ||
| 61 | # === STRUCTS ================================================================= | 62 | # === STRUCTS ================================================================= |
| 62 | 63 | ||
| 63 | 64 | ||
| 65 | +def check_value(name, value, expected): | ||
| 66 | + """ simplify verification of values in extract_from """ | ||
| 67 | + if isinstance(expected, (list, tuple)): | ||
| 68 | + if value not in expected: | ||
| 69 | + exp_str = '[' + ' OR '.join('{0:04X}'.format(val) | ||
| 70 | + for val in expected) + ']' | ||
| 71 | + raise PptUnexpectedData( | ||
| 72 | + 'Current User', name, | ||
| 73 | + '{0:04X}'.format(value), exp_str) | ||
| 74 | + elif expected != value: | ||
| 75 | + raise PptUnexpectedData( | ||
| 76 | + 'Current User', name, | ||
| 77 | + '{0:04X}'.format(value), '{0:04X}'.format(expected)) | ||
| 78 | + | ||
| 79 | + | ||
| 64 | class RecordHeader(object): | 80 | class RecordHeader(object): |
| 65 | """ a record header, often found in ppt files | 81 | """ a record header, often found in ppt files |
| 66 | 82 | ||
| @@ -123,6 +139,9 @@ class CurrentUserAtom(object): | @@ -123,6 +139,9 @@ class CurrentUserAtom(object): | ||
| 123 | self.unicode_user_name = None | 139 | self.unicode_user_name = None |
| 124 | self.rel_version = None | 140 | self.rel_version = None |
| 125 | 141 | ||
| 142 | + def is_encrypted(self): | ||
| 143 | + return self.header_token == self.HEADER_TOKEN_ENCRYPT | ||
| 144 | + | ||
| 126 | @classmethod | 145 | @classmethod |
| 127 | def extract_from(clz, ole): | 146 | def extract_from(clz, ole): |
| 128 | """ extract info from olefile """ | 147 | """ extract info from olefile """ |
| @@ -137,21 +156,19 @@ class CurrentUserAtom(object): | @@ -137,21 +156,19 @@ class CurrentUserAtom(object): | ||
| 137 | 156 | ||
| 138 | # parse record header | 157 | # parse record header |
| 139 | obj.rec_head = RecordHeader.extract_from(stream) | 158 | obj.rec_head = RecordHeader.extract_from(stream) |
| 140 | - obj.check_value('rec_version', obj.rec_head.rec_ver, 0) | ||
| 141 | - obj.check_value('rec_instance', obj.rec_head.rec_ver, 0) | ||
| 142 | - obj.check_value('rec_instance', obj.rec_head.rec_type, | ||
| 143 | - clz.RECORD_TYPE) | 159 | + check_value('rec_version', obj.rec_head.rec_ver, 0) |
| 160 | + check_value('rec_instance', obj.rec_head.rec_ver, 0) | ||
| 161 | + check_value('rec_type', obj.rec_head.rec_type, clz.RECORD_TYPE) | ||
| 144 | 162 | ||
| 145 | size, = struct.unpack('<L', stream.read(4)) | 163 | size, = struct.unpack('<L', stream.read(4)) |
| 146 | - obj.check_value('size', size, obj.SIZE) | 164 | + check_value('size', size, obj.SIZE) |
| 147 | obj.header_token, = struct.unpack('<L', stream.read(4)) | 165 | obj.header_token, = struct.unpack('<L', stream.read(4)) |
| 148 | - obj.check_value('headerToken', obj.header_token, | ||
| 149 | - [clz.HEADER_TOKEN_ENCRYPT, | ||
| 150 | - clz.HEADER_TOKEN_NOCRYPT]) | 166 | + check_value('headerToken', obj.header_token, |
| 167 | + [clz.HEADER_TOKEN_ENCRYPT, clz.HEADER_TOKEN_NOCRYPT]) | ||
| 151 | log.debug('headerToken is encrypt: {}' | 168 | log.debug('headerToken is encrypt: {}' |
| 152 | .format(obj.header_token == clz.HEADER_TOKEN_ENCRYPT)) | 169 | .format(obj.header_token == clz.HEADER_TOKEN_ENCRYPT)) |
| 153 | obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4)) | 170 | obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4)) |
| 154 | - log.debug('offsetToCurrentEdit: {0} ({0:04X})' | 171 | + log.debug('offsetToCurrentEdit: {0} (0x{0:04X})' |
| 155 | .format(obj.offset_to_current_edit)) | 172 | .format(obj.offset_to_current_edit)) |
| 156 | obj.len_user_name, = struct.unpack('<H', stream.read(2)) | 173 | obj.len_user_name, = struct.unpack('<H', stream.read(2)) |
| 157 | log.debug('lenUserName: {}'.format(obj.len_user_name)) | 174 | log.debug('lenUserName: {}'.format(obj.len_user_name)) |
| @@ -160,22 +177,19 @@ class CurrentUserAtom(object): | @@ -160,22 +177,19 @@ class CurrentUserAtom(object): | ||
| 160 | 'Current User', 'CurrentUserAtom.lenUserName', | 177 | 'Current User', 'CurrentUserAtom.lenUserName', |
| 161 | obj.len_user_name, '< 256') | 178 | obj.len_user_name, '< 256') |
| 162 | obj.doc_file_version, = struct.unpack('<H', stream.read(2)) | 179 | obj.doc_file_version, = struct.unpack('<H', stream.read(2)) |
| 163 | - obj.check_value('docFileVersion', obj.doc_file_version, | ||
| 164 | - clz.DOC_FILE_VERSION) | 180 | + check_value('docFileVersion', obj.doc_file_version, |
| 181 | + clz.DOC_FILE_VERSION) | ||
| 165 | obj.major_version, = struct.unpack('<B', stream.read(1)) | 182 | obj.major_version, = struct.unpack('<B', stream.read(1)) |
| 166 | - obj.check_value('majorVersion', obj.major_version, | ||
| 167 | - clz.MAJOR_VERSION) | 183 | + check_value('majorVersion', obj.major_version, clz.MAJOR_VERSION) |
| 168 | obj.minor_version, = struct.unpack('<B', stream.read(1)) | 184 | obj.minor_version, = struct.unpack('<B', stream.read(1)) |
| 169 | - obj.check_value('minorVersion', obj.minor_version, | ||
| 170 | - clz.MINOR_VERSION) | 185 | + check_value('minorVersion', obj.minor_version, clz.MINOR_VERSION) |
| 171 | stream.read(2) # unused | 186 | stream.read(2) # unused |
| 172 | obj.ansi_user_name = stream.read(obj.len_user_name) | 187 | obj.ansi_user_name = stream.read(obj.len_user_name) |
| 173 | log.debug('ansiUserName: {!r}'.format(obj.ansi_user_name)) | 188 | log.debug('ansiUserName: {!r}'.format(obj.ansi_user_name)) |
| 174 | obj.rel_version, = struct.unpack('<L', stream.read(4)) | 189 | obj.rel_version, = struct.unpack('<L', stream.read(4)) |
| 175 | log.debug('relVersion: {0:04X}'.format(obj.rel_version)) | 190 | log.debug('relVersion: {0:04X}'.format(obj.rel_version)) |
| 176 | - obj.check_value('relVersion', obj.rel_version, | ||
| 177 | - [clz.REL_VERSION_CAN_USE, | ||
| 178 | - clz.REL_VERSION_NO_USE]) | 191 | + check_value('relVersion', obj.rel_version, |
| 192 | + [clz.REL_VERSION_CAN_USE, clz.REL_VERSION_NO_USE]) | ||
| 179 | obj.unicode_user_name = stream.read(2 * obj.len_user_name) | 193 | obj.unicode_user_name = stream.read(2 * obj.len_user_name) |
| 180 | log.debug('unicodeUserName: {!r}'.format(obj.unicode_user_name)) | 194 | log.debug('unicodeUserName: {!r}'.format(obj.unicode_user_name)) |
| 181 | 195 | ||
| @@ -188,19 +202,67 @@ class CurrentUserAtom(object): | @@ -188,19 +202,67 @@ class CurrentUserAtom(object): | ||
| 188 | log.debug('closing stream') | 202 | log.debug('closing stream') |
| 189 | stream.close() | 203 | stream.close() |
| 190 | 204 | ||
| 191 | - def check_value(self, name, value, expected): | ||
| 192 | - """ simplify verification of values in extract_from """ | ||
| 193 | - if isinstance(expected, (list, tuple)): | ||
| 194 | - if value not in expected: | ||
| 195 | - exp_str = '[' + ' OR '.join('{0:04X}'.format(val) | ||
| 196 | - for val in expected) + ']' | ||
| 197 | - raise PptUnexpectedData( | ||
| 198 | - 'Current User', 'CurrentUserAtom.' + name, | ||
| 199 | - '{0:04X}'.format(value), exp_str) | ||
| 200 | - elif expected != value: | ||
| 201 | - raise PptUnexpectedData( | ||
| 202 | - 'Current User', 'CurrentUserAtom.' + name, | ||
| 203 | - '{0:04X}'.format(value), '{0:04X}'.format(expected)) | 205 | +class UserEditAtom(object): |
| 206 | + """ An atom record that specifies information about a user edit | ||
| 207 | + | ||
| 208 | + https://msdn.microsoft.com/en-us/library/dd945746%28v=office.12%29.aspx | ||
| 209 | + """ | ||
| 210 | + | ||
| 211 | + RECORD_TYPE = 0x0FF5 | ||
| 212 | + MINOR_VERSION = 0x00 | ||
| 213 | + MAJOR_VERSION = 0x03 | ||
| 214 | + | ||
| 215 | + def __init__(self): | ||
| 216 | + self.rec_head = None | ||
| 217 | + self.last_slide_id_ref = None | ||
| 218 | + self.version = None | ||
| 219 | + self.minor_version = None | ||
| 220 | + self.major_version = None | ||
| 221 | + self.offset_last_edit = None | ||
| 222 | + self.offset_persist_directory = None | ||
| 223 | + self.doc_persist_id_ref = None | ||
| 224 | + self.persist_id_seed = None | ||
| 225 | + self.last_view = None | ||
| 226 | + self.encrypt_session_persist_id_ref = None | ||
| 227 | + | ||
| 228 | + @classmethod | ||
| 229 | + def extract_from(clz, stream, is_encrypted): | ||
| 230 | + """ extract info from given stream (already positioned correctly!) """ | ||
| 231 | + | ||
| 232 | + log.debug('extract UserEditAtom from stream') | ||
| 233 | + | ||
| 234 | + obj = clz() | ||
| 235 | + | ||
| 236 | + # parse record header | ||
| 237 | + obj.rec_head = RecordHeader.extract_from(stream) | ||
| 238 | + check_value('rec_version', obj.rec_head.rec_ver, 0) | ||
| 239 | + check_value('rec_instance', obj.rec_head.rec_ver, 0) | ||
| 240 | + check_value('rec_type', obj.rec_head.rec_type, clz.RECORD_TYPE) | ||
| 241 | + | ||
| 242 | + obj.last_slide_id_ref, = struct.unpack('<L', stream.read(4)) | ||
| 243 | + obj.version, = struct.unpack('<H', stream.read(2)) | ||
| 244 | + obj.minor_version, = struct.unpack('<B', stream.read(1)) | ||
| 245 | + check_value('minorVersion', obj.minor_version, clz.MINOR_VERSION) | ||
| 246 | + obj.major_version, = struct.unpack('<B', stream.read(1)) | ||
| 247 | + check_value('majorVersion', obj.major_version, clz.MAJOR_VERSION) | ||
| 248 | + obj.offset_last_edit, = struct.unpack('<L', stream.read(4)) | ||
| 249 | + log.debug('offsetLastEdit: {0} (0x{0:04X})'.format(obj.offset_last_edit)) | ||
| 250 | + # todo: check that this is before start pos / prev pos; 0x000 is end | ||
| 251 | + obj.offset_persist_directory, = struct.unpack('<L', stream.read(4)) | ||
| 252 | + log.debug('offsetPersistDir: {0} (0x{0:04X})' | ||
| 253 | + .format(obj.offset_persist_directory)) | ||
| 254 | + obj.doc_persist_id_ref, = struct.unpack('<L', stream.read(4)) | ||
| 255 | + check_value('docPersistIdRef', obj.doc_persist_id_ref, 1) | ||
| 256 | + obj.persist_id_seed, = struct.unpack('<L', stream.read(4)) | ||
| 257 | + obj.last_view, = struct.unpack('<H', stream.read(2)) | ||
| 258 | + stream.read(2) # unused | ||
| 259 | + if is_encrypted: | ||
| 260 | + obj.encrypt_session_persist_id_ref, = struct.unpack('<L', | ||
| 261 | + stream.read(4)) | ||
| 262 | + else: | ||
| 263 | + obj.encrypt_session_persist_id_ref = None | ||
| 264 | + | ||
| 265 | + return obj | ||
| 204 | 266 | ||
| 205 | 267 | ||
| 206 | # === PptParser =============================================================== | 268 | # === PptParser =============================================================== |
| @@ -227,6 +289,8 @@ class PptParser(object): | @@ -227,6 +289,8 @@ class PptParser(object): | ||
| 227 | 289 | ||
| 228 | self.fast_fail = fast_fail | 290 | self.fast_fail = fast_fail |
| 229 | 291 | ||
| 292 | + self.current_user_atom = None | ||
| 293 | + | ||
| 230 | # basic compatibility check: root directory structure is | 294 | # basic compatibility check: root directory structure is |
| 231 | # [['\x05DocumentSummaryInformation'], | 295 | # [['\x05DocumentSummaryInformation'], |
| 232 | # ['\x05SummaryInformation'], | 296 | # ['\x05SummaryInformation'], |
| @@ -282,6 +346,10 @@ class PptParser(object): | @@ -282,6 +346,10 @@ class PptParser(object): | ||
| 282 | https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx | 346 | https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx |
| 283 | """ | 347 | """ |
| 284 | 348 | ||
| 349 | + if self.current_user_atom is not None: | ||
| 350 | + log.warning('re-reading and overwriting ' | ||
| 351 | + 'previously read CurrentUserAtom') | ||
| 352 | + | ||
| 285 | try: | 353 | try: |
| 286 | self.current_user_atom = CurrentUserAtom.extract_from(self.ole) | 354 | self.current_user_atom = CurrentUserAtom.extract_from(self.ole) |
| 287 | except Exception: | 355 | except Exception: |
| @@ -290,6 +358,25 @@ class PptParser(object): | @@ -290,6 +358,25 @@ class PptParser(object): | ||
| 290 | else: | 358 | else: |
| 291 | self._log_exception() | 359 | self._log_exception() |
| 292 | 360 | ||
| 361 | + def construct_persist_object_directory(self): | ||
| 362 | + """ part 2 """ | ||
| 363 | + | ||
| 364 | + if self.current_user_atom is None: | ||
| 365 | + self.parse_current_user() | ||
| 366 | + | ||
| 367 | + offset = self.current_user_atom.offset_to_current_edit | ||
| 368 | + is_encrypted = self.current_user_atom.is_encrypted() | ||
| 369 | + stream = None | ||
| 370 | + | ||
| 371 | + try: | ||
| 372 | + stream = self.ole.openstream('PowerPoint Document') | ||
| 373 | + stream.seek(offset) | ||
| 374 | + user_edit = UserEditAtom.extract_from(stream, is_encrypted) | ||
| 375 | + finally: | ||
| 376 | + if stream is not None: | ||
| 377 | + log.debug('closing stream') | ||
| 378 | + stream.close() | ||
| 379 | + | ||
| 293 | # === TESTING ================================================================= | 380 | # === TESTING ================================================================= |
| 294 | 381 | ||
| 295 | def test(): | 382 | def test(): |
| @@ -305,6 +392,7 @@ def test(): | @@ -305,6 +392,7 @@ def test(): | ||
| 305 | # parse | 392 | # parse |
| 306 | ppt = PptParser(test_file) | 393 | ppt = PptParser(test_file) |
| 307 | ppt.parse_current_user() | 394 | ppt.parse_current_user() |
| 395 | + ppt.construct_persist_object_directory() | ||
| 308 | 396 | ||
| 309 | 397 | ||
| 310 | if __name__ == '__main__': | 398 | if __name__ == '__main__': |