Commit dd5ee6df43c6ec547b428089ba4d5ea5e28f1ae4
1 parent
8ae664a2
continue with UserEditAtom
Showing
1 changed file
with
119 additions
and
31 deletions
oletools/ppt_parser.py
| ... | ... | @@ -17,6 +17,7 @@ References: |
| 17 | 17 | #------------------------------------------------------------------------------ |
| 18 | 18 | # TODO: |
| 19 | 19 | # - license |
| 20 | +# - create a AtomBase class that defines check_value and parses RecordHead? | |
| 20 | 21 | # |
| 21 | 22 | # CHANGELOG: |
| 22 | 23 | # 2016-05-04 v0.01 CH: - start parsing "Current User" stream |
| ... | ... | @@ -61,6 +62,21 @@ class PptUnexpectedData(Exception): |
| 61 | 62 | # === STRUCTS ================================================================= |
| 62 | 63 | |
| 63 | 64 | |
| 65 | +def check_value(name, value, expected): | |
| 66 | + """ simplify verification of values in extract_from """ | |
| 67 | + if isinstance(expected, (list, tuple)): | |
| 68 | + if value not in expected: | |
| 69 | + exp_str = '[' + ' OR '.join('{0:04X}'.format(val) | |
| 70 | + for val in expected) + ']' | |
| 71 | + raise PptUnexpectedData( | |
| 72 | + 'Current User', name, | |
| 73 | + '{0:04X}'.format(value), exp_str) | |
| 74 | + elif expected != value: | |
| 75 | + raise PptUnexpectedData( | |
| 76 | + 'Current User', name, | |
| 77 | + '{0:04X}'.format(value), '{0:04X}'.format(expected)) | |
| 78 | + | |
| 79 | + | |
| 64 | 80 | class RecordHeader(object): |
| 65 | 81 | """ a record header, often found in ppt files |
| 66 | 82 | |
| ... | ... | @@ -123,6 +139,9 @@ class CurrentUserAtom(object): |
| 123 | 139 | self.unicode_user_name = None |
| 124 | 140 | self.rel_version = None |
| 125 | 141 | |
| 142 | + def is_encrypted(self): | |
| 143 | + return self.header_token == self.HEADER_TOKEN_ENCRYPT | |
| 144 | + | |
| 126 | 145 | @classmethod |
| 127 | 146 | def extract_from(clz, ole): |
| 128 | 147 | """ extract info from olefile """ |
| ... | ... | @@ -137,21 +156,19 @@ class CurrentUserAtom(object): |
| 137 | 156 | |
| 138 | 157 | # parse record header |
| 139 | 158 | obj.rec_head = RecordHeader.extract_from(stream) |
| 140 | - obj.check_value('rec_version', obj.rec_head.rec_ver, 0) | |
| 141 | - obj.check_value('rec_instance', obj.rec_head.rec_ver, 0) | |
| 142 | - obj.check_value('rec_instance', obj.rec_head.rec_type, | |
| 143 | - clz.RECORD_TYPE) | |
| 159 | + check_value('rec_version', obj.rec_head.rec_ver, 0) | |
| 160 | + check_value('rec_instance', obj.rec_head.rec_ver, 0) | |
| 161 | + check_value('rec_type', obj.rec_head.rec_type, clz.RECORD_TYPE) | |
| 144 | 162 | |
| 145 | 163 | size, = struct.unpack('<L', stream.read(4)) |
| 146 | - obj.check_value('size', size, obj.SIZE) | |
| 164 | + check_value('size', size, obj.SIZE) | |
| 147 | 165 | obj.header_token, = struct.unpack('<L', stream.read(4)) |
| 148 | - obj.check_value('headerToken', obj.header_token, | |
| 149 | - [clz.HEADER_TOKEN_ENCRYPT, | |
| 150 | - clz.HEADER_TOKEN_NOCRYPT]) | |
| 166 | + check_value('headerToken', obj.header_token, | |
| 167 | + [clz.HEADER_TOKEN_ENCRYPT, clz.HEADER_TOKEN_NOCRYPT]) | |
| 151 | 168 | log.debug('headerToken is encrypt: {}' |
| 152 | 169 | .format(obj.header_token == clz.HEADER_TOKEN_ENCRYPT)) |
| 153 | 170 | obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4)) |
| 154 | - log.debug('offsetToCurrentEdit: {0} ({0:04X})' | |
| 171 | + log.debug('offsetToCurrentEdit: {0} (0x{0:04X})' | |
| 155 | 172 | .format(obj.offset_to_current_edit)) |
| 156 | 173 | obj.len_user_name, = struct.unpack('<H', stream.read(2)) |
| 157 | 174 | log.debug('lenUserName: {}'.format(obj.len_user_name)) |
| ... | ... | @@ -160,22 +177,19 @@ class CurrentUserAtom(object): |
| 160 | 177 | 'Current User', 'CurrentUserAtom.lenUserName', |
| 161 | 178 | obj.len_user_name, '< 256') |
| 162 | 179 | obj.doc_file_version, = struct.unpack('<H', stream.read(2)) |
| 163 | - obj.check_value('docFileVersion', obj.doc_file_version, | |
| 164 | - clz.DOC_FILE_VERSION) | |
| 180 | + check_value('docFileVersion', obj.doc_file_version, | |
| 181 | + clz.DOC_FILE_VERSION) | |
| 165 | 182 | obj.major_version, = struct.unpack('<B', stream.read(1)) |
| 166 | - obj.check_value('majorVersion', obj.major_version, | |
| 167 | - clz.MAJOR_VERSION) | |
| 183 | + check_value('majorVersion', obj.major_version, clz.MAJOR_VERSION) | |
| 168 | 184 | obj.minor_version, = struct.unpack('<B', stream.read(1)) |
| 169 | - obj.check_value('minorVersion', obj.minor_version, | |
| 170 | - clz.MINOR_VERSION) | |
| 185 | + check_value('minorVersion', obj.minor_version, clz.MINOR_VERSION) | |
| 171 | 186 | stream.read(2) # unused |
| 172 | 187 | obj.ansi_user_name = stream.read(obj.len_user_name) |
| 173 | 188 | log.debug('ansiUserName: {!r}'.format(obj.ansi_user_name)) |
| 174 | 189 | obj.rel_version, = struct.unpack('<L', stream.read(4)) |
| 175 | 190 | log.debug('relVersion: {0:04X}'.format(obj.rel_version)) |
| 176 | - obj.check_value('relVersion', obj.rel_version, | |
| 177 | - [clz.REL_VERSION_CAN_USE, | |
| 178 | - clz.REL_VERSION_NO_USE]) | |
| 191 | + check_value('relVersion', obj.rel_version, | |
| 192 | + [clz.REL_VERSION_CAN_USE, clz.REL_VERSION_NO_USE]) | |
| 179 | 193 | obj.unicode_user_name = stream.read(2 * obj.len_user_name) |
| 180 | 194 | log.debug('unicodeUserName: {!r}'.format(obj.unicode_user_name)) |
| 181 | 195 | |
| ... | ... | @@ -188,19 +202,67 @@ class CurrentUserAtom(object): |
| 188 | 202 | log.debug('closing stream') |
| 189 | 203 | stream.close() |
| 190 | 204 | |
| 191 | - def check_value(self, name, value, expected): | |
| 192 | - """ simplify verification of values in extract_from """ | |
| 193 | - if isinstance(expected, (list, tuple)): | |
| 194 | - if value not in expected: | |
| 195 | - exp_str = '[' + ' OR '.join('{0:04X}'.format(val) | |
| 196 | - for val in expected) + ']' | |
| 197 | - raise PptUnexpectedData( | |
| 198 | - 'Current User', 'CurrentUserAtom.' + name, | |
| 199 | - '{0:04X}'.format(value), exp_str) | |
| 200 | - elif expected != value: | |
| 201 | - raise PptUnexpectedData( | |
| 202 | - 'Current User', 'CurrentUserAtom.' + name, | |
| 203 | - '{0:04X}'.format(value), '{0:04X}'.format(expected)) | |
| 205 | +class UserEditAtom(object): | |
| 206 | + """ An atom record that specifies information about a user edit | |
| 207 | + | |
| 208 | + https://msdn.microsoft.com/en-us/library/dd945746%28v=office.12%29.aspx | |
| 209 | + """ | |
| 210 | + | |
| 211 | + RECORD_TYPE = 0x0FF5 | |
| 212 | + MINOR_VERSION = 0x00 | |
| 213 | + MAJOR_VERSION = 0x03 | |
| 214 | + | |
| 215 | + def __init__(self): | |
| 216 | + self.rec_head = None | |
| 217 | + self.last_slide_id_ref = None | |
| 218 | + self.version = None | |
| 219 | + self.minor_version = None | |
| 220 | + self.major_version = None | |
| 221 | + self.offset_last_edit = None | |
| 222 | + self.offset_persist_directory = None | |
| 223 | + self.doc_persist_id_ref = None | |
| 224 | + self.persist_id_seed = None | |
| 225 | + self.last_view = None | |
| 226 | + self.encrypt_session_persist_id_ref = None | |
| 227 | + | |
| 228 | + @classmethod | |
| 229 | + def extract_from(clz, stream, is_encrypted): | |
| 230 | + """ extract info from given stream (already positioned correctly!) """ | |
| 231 | + | |
| 232 | + log.debug('extract UserEditAtom from stream') | |
| 233 | + | |
| 234 | + obj = clz() | |
| 235 | + | |
| 236 | + # parse record header | |
| 237 | + obj.rec_head = RecordHeader.extract_from(stream) | |
| 238 | + check_value('rec_version', obj.rec_head.rec_ver, 0) | |
| 239 | + check_value('rec_instance', obj.rec_head.rec_ver, 0) | |
| 240 | + check_value('rec_type', obj.rec_head.rec_type, clz.RECORD_TYPE) | |
| 241 | + | |
| 242 | + obj.last_slide_id_ref, = struct.unpack('<L', stream.read(4)) | |
| 243 | + obj.version, = struct.unpack('<H', stream.read(2)) | |
| 244 | + obj.minor_version, = struct.unpack('<B', stream.read(1)) | |
| 245 | + check_value('minorVersion', obj.minor_version, clz.MINOR_VERSION) | |
| 246 | + obj.major_version, = struct.unpack('<B', stream.read(1)) | |
| 247 | + check_value('majorVersion', obj.major_version, clz.MAJOR_VERSION) | |
| 248 | + obj.offset_last_edit, = struct.unpack('<L', stream.read(4)) | |
| 249 | + log.debug('offsetLastEdit: {0} (0x{0:04X})'.format(obj.offset_last_edit)) | |
| 250 | + # todo: check that this is before start pos / prev pos; 0x000 is end | |
| 251 | + obj.offset_persist_directory, = struct.unpack('<L', stream.read(4)) | |
| 252 | + log.debug('offsetPersistDir: {0} (0x{0:04X})' | |
| 253 | + .format(obj.offset_persist_directory)) | |
| 254 | + obj.doc_persist_id_ref, = struct.unpack('<L', stream.read(4)) | |
| 255 | + check_value('docPersistIdRef', obj.doc_persist_id_ref, 1) | |
| 256 | + obj.persist_id_seed, = struct.unpack('<L', stream.read(4)) | |
| 257 | + obj.last_view, = struct.unpack('<H', stream.read(2)) | |
| 258 | + stream.read(2) # unused | |
| 259 | + if is_encrypted: | |
| 260 | + obj.encrypt_session_persist_id_ref, = struct.unpack('<L', | |
| 261 | + stream.read(4)) | |
| 262 | + else: | |
| 263 | + obj.encrypt_session_persist_id_ref = None | |
| 264 | + | |
| 265 | + return obj | |
| 204 | 266 | |
| 205 | 267 | |
| 206 | 268 | # === PptParser =============================================================== |
| ... | ... | @@ -227,6 +289,8 @@ class PptParser(object): |
| 227 | 289 | |
| 228 | 290 | self.fast_fail = fast_fail |
| 229 | 291 | |
| 292 | + self.current_user_atom = None | |
| 293 | + | |
| 230 | 294 | # basic compatibility check: root directory structure is |
| 231 | 295 | # [['\x05DocumentSummaryInformation'], |
| 232 | 296 | # ['\x05SummaryInformation'], |
| ... | ... | @@ -282,6 +346,10 @@ class PptParser(object): |
| 282 | 346 | https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx |
| 283 | 347 | """ |
| 284 | 348 | |
| 349 | + if self.current_user_atom is not None: | |
| 350 | + log.warning('re-reading and overwriting ' | |
| 351 | + 'previously read CurrentUserAtom') | |
| 352 | + | |
| 285 | 353 | try: |
| 286 | 354 | self.current_user_atom = CurrentUserAtom.extract_from(self.ole) |
| 287 | 355 | except Exception: |
| ... | ... | @@ -290,6 +358,25 @@ class PptParser(object): |
| 290 | 358 | else: |
| 291 | 359 | self._log_exception() |
| 292 | 360 | |
| 361 | + def construct_persist_object_directory(self): | |
| 362 | + """ part 2 """ | |
| 363 | + | |
| 364 | + if self.current_user_atom is None: | |
| 365 | + self.parse_current_user() | |
| 366 | + | |
| 367 | + offset = self.current_user_atom.offset_to_current_edit | |
| 368 | + is_encrypted = self.current_user_atom.is_encrypted() | |
| 369 | + stream = None | |
| 370 | + | |
| 371 | + try: | |
| 372 | + stream = self.ole.openstream('PowerPoint Document') | |
| 373 | + stream.seek(offset) | |
| 374 | + user_edit = UserEditAtom.extract_from(stream, is_encrypted) | |
| 375 | + finally: | |
| 376 | + if stream is not None: | |
| 377 | + log.debug('closing stream') | |
| 378 | + stream.close() | |
| 379 | + | |
| 293 | 380 | # === TESTING ================================================================= |
| 294 | 381 | |
| 295 | 382 | def test(): |
| ... | ... | @@ -305,6 +392,7 @@ def test(): |
| 305 | 392 | # parse |
| 306 | 393 | ppt = PptParser(test_file) |
| 307 | 394 | ppt.parse_current_user() |
| 395 | + ppt.construct_persist_object_directory() | |
| 308 | 396 | |
| 309 | 397 | |
| 310 | 398 | if __name__ == '__main__': | ... | ... |