Commit 87a69ade07f3bd276349eea7d32013b3b1240177
1 parent
62c927a8
made CurrentUserAtom a PptType -- streamlined last remaining class (currently)
Showing
1 changed file
with
112 additions
and
105 deletions
oletools/ppt_parser.py
| ... | ... | @@ -16,7 +16,6 @@ References: |
| 16 | 16 | # TODO |
| 17 | 17 | #------------------------------------------------------------------------------ |
| 18 | 18 | # TODO: |
| 19 | -# - make CurrentUserAtom and UserEditAtom PptTypes; adjust parse | |
| 20 | 19 | # - make stream optional in PptUnexpectedData |
| 21 | 20 | # - can speed-up by using less bigger struct.parse calls? |
| 22 | 21 | # - license |
| ... | ... | @@ -125,97 +124,6 @@ class RecordHeader(object): |
| 125 | 124 | return obj |
| 126 | 125 | |
| 127 | 126 | |
| 128 | -class CurrentUserAtom(object): | |
| 129 | - """ An atom record that specifies information about the last user to modify | |
| 130 | - the file and where the most recent user edit is located. This is the only | |
| 131 | - record in the Current User Stream (section 2.1.1). | |
| 132 | - | |
| 133 | - https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx | |
| 134 | - """ | |
| 135 | - | |
| 136 | - # allowed values for header_token | |
| 137 | - HEADER_TOKEN_ENCRYPT = 0xF3D1C4DF | |
| 138 | - HEADER_TOKEN_NOCRYPT = 0xE391C05F | |
| 139 | - | |
| 140 | - # allowed values for rel_version | |
| 141 | - REL_VERSION_CAN_USE = 0x00000008 | |
| 142 | - REL_VERSION_NO_USE = 0x00000009 | |
| 143 | - | |
| 144 | - # required values | |
| 145 | - RECORD_TYPE = 0x0FF6 | |
| 146 | - SIZE = 0x14 | |
| 147 | - DOC_FILE_VERSION = 0x03F4 | |
| 148 | - MAJOR_VERSION = 0x03 | |
| 149 | - MINOR_VERSION = 0x00 | |
| 150 | - | |
| 151 | - def __init__(self): | |
| 152 | - self.rec_head = None | |
| 153 | - self.size = None | |
| 154 | - self.header_token = None | |
| 155 | - self.offset_to_current_edit = None | |
| 156 | - self.len_user_name = None | |
| 157 | - self.doc_file_version = None | |
| 158 | - self.major_version = None | |
| 159 | - self.minor_version = None | |
| 160 | - self.ansi_user_name = None | |
| 161 | - self.unicode_user_name = None | |
| 162 | - self.rel_version = None | |
| 163 | - | |
| 164 | - def is_encrypted(self): | |
| 165 | - return self.header_token == self.HEADER_TOKEN_ENCRYPT | |
| 166 | - | |
| 167 | - @classmethod | |
| 168 | - def extract_from(clz, ole): | |
| 169 | - """ extract info from olefile """ | |
| 170 | - | |
| 171 | - log.debug('parsing "Current User"') | |
| 172 | - | |
| 173 | - stream = None | |
| 174 | - try: | |
| 175 | - # open stream | |
| 176 | - log.debug('opening stream') | |
| 177 | - stream = ole.openstream('Current User') | |
| 178 | - obj = clz() | |
| 179 | - | |
| 180 | - # parse record header | |
| 181 | - obj.rec_head = RecordHeader.extract_from(stream) | |
| 182 | - check_value('rec_version', obj.rec_head.rec_ver, 0) | |
| 183 | - check_value('rec_instance', obj.rec_head.rec_instance, 0) | |
| 184 | - check_value('rec_type', obj.rec_head.rec_type, clz.RECORD_TYPE) | |
| 185 | - | |
| 186 | - size, = struct.unpack('<L', stream.read(4)) | |
| 187 | - check_value('size', size, obj.SIZE) | |
| 188 | - obj.header_token, = struct.unpack('<L', stream.read(4)) | |
| 189 | - check_value('headerToken', obj.header_token, | |
| 190 | - [clz.HEADER_TOKEN_ENCRYPT, clz.HEADER_TOKEN_NOCRYPT]) | |
| 191 | - obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4)) | |
| 192 | - obj.len_user_name, = struct.unpack('<H', stream.read(2)) | |
| 193 | - if obj.len_user_name > 255: | |
| 194 | - raise PptUnexpectedData( | |
| 195 | - 'Current User', 'CurrentUserAtom.lenUserName', | |
| 196 | - obj.len_user_name, '< 256') | |
| 197 | - obj.doc_file_version, = struct.unpack('<H', stream.read(2)) | |
| 198 | - check_value('docFileVersion', obj.doc_file_version, | |
| 199 | - clz.DOC_FILE_VERSION) | |
| 200 | - obj.major_version, = struct.unpack('<B', stream.read(1)) | |
| 201 | - check_value('majorVersion', obj.major_version, clz.MAJOR_VERSION) | |
| 202 | - obj.minor_version, = struct.unpack('<B', stream.read(1)) | |
| 203 | - check_value('minorVersion', obj.minor_version, clz.MINOR_VERSION) | |
| 204 | - stream.read(2) # unused | |
| 205 | - obj.ansi_user_name = stream.read(obj.len_user_name) | |
| 206 | - obj.rel_version, = struct.unpack('<L', stream.read(4)) | |
| 207 | - check_value('relVersion', obj.rel_version, | |
| 208 | - [clz.REL_VERSION_CAN_USE, clz.REL_VERSION_NO_USE]) | |
| 209 | - obj.unicode_user_name = stream.read(2 * obj.len_user_name) | |
| 210 | - | |
| 211 | - return obj | |
| 212 | - | |
| 213 | - finally: | |
| 214 | - if stream is not None: | |
| 215 | - log.debug('closing stream') | |
| 216 | - stream.close() | |
| 217 | - | |
| 218 | - | |
| 219 | 127 | class PptType(object): |
| 220 | 128 | """ base class of data types found in ppt ole files |
| 221 | 129 | |
| ... | ... | @@ -321,6 +229,96 @@ class PptType(object): |
| 321 | 229 | return errs |
| 322 | 230 | |
| 323 | 231 | |
| 232 | +class CurrentUserAtom(PptType): | |
| 233 | + """ An atom record that specifies information about the last user to modify | |
| 234 | + the file and where the most recent user edit is located. This is the only | |
| 235 | + record in the Current User Stream (section 2.1.1). | |
| 236 | + | |
| 237 | + https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx | |
| 238 | + """ | |
| 239 | + | |
| 240 | + # allowed values for header_token | |
| 241 | + HEADER_TOKEN_ENCRYPT = 0xF3D1C4DF | |
| 242 | + HEADER_TOKEN_NOCRYPT = 0xE391C05F | |
| 243 | + | |
| 244 | + # allowed values for rel_version | |
| 245 | + REL_VERSION_CAN_USE = 0x00000008 | |
| 246 | + REL_VERSION_NO_USE = 0x00000009 | |
| 247 | + | |
| 248 | + # required values | |
| 249 | + RECORD_TYPE = 0x0FF6 | |
| 250 | + SIZE = 0x14 | |
| 251 | + DOC_FILE_VERSION = 0x03F4 | |
| 252 | + MAJOR_VERSION = 0x03 | |
| 253 | + MINOR_VERSION = 0x00 | |
| 254 | + | |
| 255 | + def __init__(self): | |
| 256 | + super(CurrentUserAtom, self).__init__(stream_name='Current User') | |
| 257 | + self.rec_head = None | |
| 258 | + self.size = None | |
| 259 | + self.header_token = None | |
| 260 | + self.offset_to_current_edit = None | |
| 261 | + self.len_user_name = None | |
| 262 | + self.doc_file_version = None | |
| 263 | + self.major_version = None | |
| 264 | + self.minor_version = None | |
| 265 | + self.ansi_user_name = None | |
| 266 | + self.unicode_user_name = None | |
| 267 | + self.rel_version = None | |
| 268 | + | |
| 269 | + def is_encrypted(self): | |
| 270 | + return self.header_token == self.HEADER_TOKEN_ENCRYPT | |
| 271 | + | |
| 272 | + @classmethod | |
| 273 | + def extract_from(clz, stream): | |
| 274 | + """ create instance with info from stream """ | |
| 275 | + | |
| 276 | + stream = None | |
| 277 | + try: | |
| 278 | + obj = clz() | |
| 279 | + | |
| 280 | + # parse record header | |
| 281 | + obj.rec_head = RecordHeader.extract_from(stream) | |
| 282 | + | |
| 283 | + size, = struct.unpack('<L', stream.read(4)) | |
| 284 | + obj.header_token, = struct.unpack('<L', stream.read(4)) | |
| 285 | + obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4)) | |
| 286 | + obj.len_user_name, = struct.unpack('<H', stream.read(2)) | |
| 287 | + obj.doc_file_version, = struct.unpack('<H', stream.read(2)) | |
| 288 | + obj.major_version, = struct.unpack('<B', stream.read(1)) | |
| 289 | + obj.minor_version, = struct.unpack('<B', stream.read(1)) | |
| 290 | + stream.read(2) # unused | |
| 291 | + obj.ansi_user_name = stream.read(obj.len_user_name) | |
| 292 | + obj.rel_version, = struct.unpack('<L', stream.read(4)) | |
| 293 | + obj.unicode_user_name = stream.read(2 * obj.len_user_name) | |
| 294 | + | |
| 295 | + return obj | |
| 296 | + | |
| 297 | + finally: | |
| 298 | + if stream is not None: | |
| 299 | + log.debug('closing stream') | |
| 300 | + stream.close() | |
| 301 | + | |
| 302 | + def check_validity(self): | |
| 303 | + errs = self.check_rec_head() | |
| 304 | + errs.extend(self.check_value('size', size, self.SIZE) | |
| 305 | + errs.extend(self.check_value('headerToken', self.header_token, | |
| 306 | + [clz.HEADER_TOKEN_ENCRYPT, | |
| 307 | + clz.HEADER_TOKEN_NOCRYPT])) | |
| 308 | + errs.extend(self.check_range('lenUserName', self.len_user_name, None, | |
| 309 | + 256)) | |
| 310 | + errs.extend(self.check_value('docFileVersion', self.doc_file_version, | |
| 311 | + clz.DOC_FILE_VERSION)) | |
| 312 | + errs.extend(self.check_value('majorVersion', self.major_version, | |
| 313 | + clz.MAJOR_VERSION)) | |
| 314 | + errs.extend(self.check_value('minorVersion', self.minor_version, | |
| 315 | + clz.MINOR_VERSION)) | |
| 316 | + errs.extend(self.check_value('relVersion', self.rel_version, | |
| 317 | + [clz.REL_VERSION_CAN_USE, | |
| 318 | + clz.REL_VERSION_NO_USE])) | |
| 319 | + return errs | |
| 320 | + | |
| 321 | + | |
| 324 | 322 | class UserEditAtom(PptType): |
| 325 | 323 | """ An atom record that specifies information about a user edit |
| 326 | 324 | |
| ... | ... | @@ -667,56 +665,56 @@ class DocumentContainer(PptType): |
| 667 | 665 | # slideHF (variable): An optional SlideHeadersFootersContainer record |
| 668 | 666 | # (section 2.4.15.1) that specifies the default header and footer |
| 669 | 667 | # information for presentation slides. |
| 670 | - obj.slide_hf = None | |
| 668 | + #obj.slide_hf = None | |
| 671 | 669 | |
| 672 | 670 | # notesHF (variable): An optional NotesHeadersFootersContainer record |
| 673 | 671 | # (section 2.4.15.6) that specifies the default header and footer |
| 674 | 672 | # information for notes slides. |
| 675 | - obj.notes_hf = None | |
| 673 | + #obj.notes_hf = None | |
| 676 | 674 | |
| 677 | 675 | # slideList (variable): An optional SlideListWithTextContainer record |
| 678 | 676 | # (section 2.4.14.3) that specifies the list of presentation slides. |
| 679 | - obj.slide_list = None | |
| 677 | + #obj.slide_list = None | |
| 680 | 678 | |
| 681 | 679 | # notesList (variable): An optional NotesListWithTextContainer record |
| 682 | 680 | # (section 2.4.14.6) that specifies the list of notes slides. |
| 683 | - obj.notes_list = None | |
| 681 | + #obj.notes_list = None | |
| 684 | 682 | |
| 685 | 683 | # slideShowDocInfoAtom (88 bytes): An optional SlideShowDocInfoAtom |
| 686 | 684 | # record (section 2.6.1) that specifies slide show information for the |
| 687 | 685 | # document. |
| 688 | - obj.slide_show_doc_info = None | |
| 686 | + #obj.slide_show_doc_info = None | |
| 689 | 687 | |
| 690 | 688 | # namedShows (variable): An optional NamedShowsContainer record |
| 691 | 689 | # (section 2.6.2) that specifies named shows in the document. |
| 692 | - obj.named_shows = None | |
| 690 | + #obj.named_shows = None | |
| 693 | 691 | |
| 694 | 692 | # summary (variable): An optional SummaryContainer record (section |
| 695 | 693 | # 2.4.22.3) that specifies bookmarks for the document. |
| 696 | - obj.summary = None | |
| 694 | + #obj.summary = None | |
| 697 | 695 | |
| 698 | 696 | # docRoutingSlipAtom (variable): An optional DocRoutingSlipAtom record |
| 699 | 697 | # (section 2.11.1) that specifies document routing information. |
| 700 | - obj.doc_routing_slip = None | |
| 698 | + #obj.doc_routing_slip = None | |
| 701 | 699 | |
| 702 | 700 | # printOptionsAtom (13 bytes): An optional PrintOptionsAtom record |
| 703 | 701 | # (section 2.4.12) that specifies default print options. |
| 704 | - obj.print_options = None | |
| 702 | + #obj.print_options = None | |
| 705 | 703 | |
| 706 | 704 | # rtCustomTableStylesAtom1 (variable): An optional |
| 707 | 705 | # RoundTripCustomTableStyles12Atom record (section 2.11.13) that |
| 708 | 706 | # specifies round-trip information for custom table styles. |
| 709 | - obj.rt_custom_table_styles_1 = None | |
| 707 | + #obj.rt_custom_table_styles_1 = None | |
| 710 | 708 | |
| 711 | 709 | # endDocumentAtom (8 bytes): An EndDocumentAtom record (section 2.4.13) |
| 712 | 710 | # that specifies the end of the information for the document. |
| 713 | - obj.end_document = None | |
| 711 | + #obj.end_document = None | |
| 714 | 712 | |
| 715 | 713 | # rtCustomTableStylesAtom2 (variable): An optional |
| 716 | 714 | # RoundTripCustomTableStyles12Atom record that specifies round-trip |
| 717 | 715 | # information for custom table styles. It MUST NOT exist if |
| 718 | 716 | # rtCustomTableStylesAtom1 exists. |
| 719 | - obj.rt_custom_table_styles_2 = None | |
| 717 | + #obj.rt_custom_table_styles_2 = None | |
| 720 | 718 | |
| 721 | 719 | return obj |
| 722 | 720 | |
| ... | ... | @@ -897,13 +895,22 @@ class PptParser(object): |
| 897 | 895 | log.warning('re-reading and overwriting ' |
| 898 | 896 | 'previously read current_user_atom') |
| 899 | 897 | |
| 898 | + log.debug('parsing "Current User"') | |
| 899 | + | |
| 900 | + stream = None | |
| 900 | 901 | try: |
| 901 | - self.current_user_atom = CurrentUserAtom.extract_from(self.ole) | |
| 902 | + log.debug('opening stream') | |
| 903 | + stream = self.ole.openstream('Current User') | |
| 904 | + self.current_user_atom = CurrentUserAtom.extract_from(stream) | |
| 902 | 905 | except Exception: |
| 903 | 906 | if self.fast_fail: |
| 904 | 907 | raise |
| 905 | 908 | else: |
| 906 | 909 | self._log_exception() |
| 910 | + finally: | |
| 911 | + if stream is not None: | |
| 912 | + log.debug('closing stream') | |
| 913 | + stream.close() | |
| 907 | 914 | |
| 908 | 915 | def parse_persist_object_directory(self): |
| 909 | 916 | """ Part 1: Construct the persist object directory """ | ... | ... |