Commit 87a69ade07f3bd276349eea7d32013b3b1240177
1 parent
62c927a8
made CurrentUserAtom a PptType -- streamlined last remaining class (currently)
Showing
1 changed file
with
112 additions
and
105 deletions
oletools/ppt_parser.py
| @@ -16,7 +16,6 @@ References: | @@ -16,7 +16,6 @@ References: | ||
| 16 | # TODO | 16 | # TODO |
| 17 | #------------------------------------------------------------------------------ | 17 | #------------------------------------------------------------------------------ |
| 18 | # TODO: | 18 | # TODO: |
| 19 | -# - make CurrentUserAtom and UserEditAtom PptTypes; adjust parse | ||
| 20 | # - make stream optional in PptUnexpectedData | 19 | # - make stream optional in PptUnexpectedData |
| 21 | # - can speed-up by using less bigger struct.parse calls? | 20 | # - can speed-up by using less bigger struct.parse calls? |
| 22 | # - license | 21 | # - license |
| @@ -125,97 +124,6 @@ class RecordHeader(object): | @@ -125,97 +124,6 @@ class RecordHeader(object): | ||
| 125 | return obj | 124 | return obj |
| 126 | 125 | ||
| 127 | 126 | ||
| 128 | -class CurrentUserAtom(object): | ||
| 129 | - """ An atom record that specifies information about the last user to modify | ||
| 130 | - the file and where the most recent user edit is located. This is the only | ||
| 131 | - record in the Current User Stream (section 2.1.1). | ||
| 132 | - | ||
| 133 | - https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx | ||
| 134 | - """ | ||
| 135 | - | ||
| 136 | - # allowed values for header_token | ||
| 137 | - HEADER_TOKEN_ENCRYPT = 0xF3D1C4DF | ||
| 138 | - HEADER_TOKEN_NOCRYPT = 0xE391C05F | ||
| 139 | - | ||
| 140 | - # allowed values for rel_version | ||
| 141 | - REL_VERSION_CAN_USE = 0x00000008 | ||
| 142 | - REL_VERSION_NO_USE = 0x00000009 | ||
| 143 | - | ||
| 144 | - # required values | ||
| 145 | - RECORD_TYPE = 0x0FF6 | ||
| 146 | - SIZE = 0x14 | ||
| 147 | - DOC_FILE_VERSION = 0x03F4 | ||
| 148 | - MAJOR_VERSION = 0x03 | ||
| 149 | - MINOR_VERSION = 0x00 | ||
| 150 | - | ||
| 151 | - def __init__(self): | ||
| 152 | - self.rec_head = None | ||
| 153 | - self.size = None | ||
| 154 | - self.header_token = None | ||
| 155 | - self.offset_to_current_edit = None | ||
| 156 | - self.len_user_name = None | ||
| 157 | - self.doc_file_version = None | ||
| 158 | - self.major_version = None | ||
| 159 | - self.minor_version = None | ||
| 160 | - self.ansi_user_name = None | ||
| 161 | - self.unicode_user_name = None | ||
| 162 | - self.rel_version = None | ||
| 163 | - | ||
| 164 | - def is_encrypted(self): | ||
| 165 | - return self.header_token == self.HEADER_TOKEN_ENCRYPT | ||
| 166 | - | ||
| 167 | - @classmethod | ||
| 168 | - def extract_from(clz, ole): | ||
| 169 | - """ extract info from olefile """ | ||
| 170 | - | ||
| 171 | - log.debug('parsing "Current User"') | ||
| 172 | - | ||
| 173 | - stream = None | ||
| 174 | - try: | ||
| 175 | - # open stream | ||
| 176 | - log.debug('opening stream') | ||
| 177 | - stream = ole.openstream('Current User') | ||
| 178 | - obj = clz() | ||
| 179 | - | ||
| 180 | - # parse record header | ||
| 181 | - obj.rec_head = RecordHeader.extract_from(stream) | ||
| 182 | - check_value('rec_version', obj.rec_head.rec_ver, 0) | ||
| 183 | - check_value('rec_instance', obj.rec_head.rec_instance, 0) | ||
| 184 | - check_value('rec_type', obj.rec_head.rec_type, clz.RECORD_TYPE) | ||
| 185 | - | ||
| 186 | - size, = struct.unpack('<L', stream.read(4)) | ||
| 187 | - check_value('size', size, obj.SIZE) | ||
| 188 | - obj.header_token, = struct.unpack('<L', stream.read(4)) | ||
| 189 | - check_value('headerToken', obj.header_token, | ||
| 190 | - [clz.HEADER_TOKEN_ENCRYPT, clz.HEADER_TOKEN_NOCRYPT]) | ||
| 191 | - obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4)) | ||
| 192 | - obj.len_user_name, = struct.unpack('<H', stream.read(2)) | ||
| 193 | - if obj.len_user_name > 255: | ||
| 194 | - raise PptUnexpectedData( | ||
| 195 | - 'Current User', 'CurrentUserAtom.lenUserName', | ||
| 196 | - obj.len_user_name, '< 256') | ||
| 197 | - obj.doc_file_version, = struct.unpack('<H', stream.read(2)) | ||
| 198 | - check_value('docFileVersion', obj.doc_file_version, | ||
| 199 | - clz.DOC_FILE_VERSION) | ||
| 200 | - obj.major_version, = struct.unpack('<B', stream.read(1)) | ||
| 201 | - check_value('majorVersion', obj.major_version, clz.MAJOR_VERSION) | ||
| 202 | - obj.minor_version, = struct.unpack('<B', stream.read(1)) | ||
| 203 | - check_value('minorVersion', obj.minor_version, clz.MINOR_VERSION) | ||
| 204 | - stream.read(2) # unused | ||
| 205 | - obj.ansi_user_name = stream.read(obj.len_user_name) | ||
| 206 | - obj.rel_version, = struct.unpack('<L', stream.read(4)) | ||
| 207 | - check_value('relVersion', obj.rel_version, | ||
| 208 | - [clz.REL_VERSION_CAN_USE, clz.REL_VERSION_NO_USE]) | ||
| 209 | - obj.unicode_user_name = stream.read(2 * obj.len_user_name) | ||
| 210 | - | ||
| 211 | - return obj | ||
| 212 | - | ||
| 213 | - finally: | ||
| 214 | - if stream is not None: | ||
| 215 | - log.debug('closing stream') | ||
| 216 | - stream.close() | ||
| 217 | - | ||
| 218 | - | ||
| 219 | class PptType(object): | 127 | class PptType(object): |
| 220 | """ base class of data types found in ppt ole files | 128 | """ base class of data types found in ppt ole files |
| 221 | 129 | ||
| @@ -321,6 +229,96 @@ class PptType(object): | @@ -321,6 +229,96 @@ class PptType(object): | ||
| 321 | return errs | 229 | return errs |
| 322 | 230 | ||
| 323 | 231 | ||
| 232 | +class CurrentUserAtom(PptType): | ||
| 233 | + """ An atom record that specifies information about the last user to modify | ||
| 234 | + the file and where the most recent user edit is located. This is the only | ||
| 235 | + record in the Current User Stream (section 2.1.1). | ||
| 236 | + | ||
| 237 | + https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx | ||
| 238 | + """ | ||
| 239 | + | ||
| 240 | + # allowed values for header_token | ||
| 241 | + HEADER_TOKEN_ENCRYPT = 0xF3D1C4DF | ||
| 242 | + HEADER_TOKEN_NOCRYPT = 0xE391C05F | ||
| 243 | + | ||
| 244 | + # allowed values for rel_version | ||
| 245 | + REL_VERSION_CAN_USE = 0x00000008 | ||
| 246 | + REL_VERSION_NO_USE = 0x00000009 | ||
| 247 | + | ||
| 248 | + # required values | ||
| 249 | + RECORD_TYPE = 0x0FF6 | ||
| 250 | + SIZE = 0x14 | ||
| 251 | + DOC_FILE_VERSION = 0x03F4 | ||
| 252 | + MAJOR_VERSION = 0x03 | ||
| 253 | + MINOR_VERSION = 0x00 | ||
| 254 | + | ||
| 255 | + def __init__(self): | ||
| 256 | + super(CurrentUserAtom, self).__init__(stream_name='Current User') | ||
| 257 | + self.rec_head = None | ||
| 258 | + self.size = None | ||
| 259 | + self.header_token = None | ||
| 260 | + self.offset_to_current_edit = None | ||
| 261 | + self.len_user_name = None | ||
| 262 | + self.doc_file_version = None | ||
| 263 | + self.major_version = None | ||
| 264 | + self.minor_version = None | ||
| 265 | + self.ansi_user_name = None | ||
| 266 | + self.unicode_user_name = None | ||
| 267 | + self.rel_version = None | ||
| 268 | + | ||
| 269 | + def is_encrypted(self): | ||
| 270 | + return self.header_token == self.HEADER_TOKEN_ENCRYPT | ||
| 271 | + | ||
| 272 | + @classmethod | ||
| 273 | + def extract_from(clz, stream): | ||
| 274 | + """ create instance with info from stream """ | ||
| 275 | + | ||
| 276 | + stream = None | ||
| 277 | + try: | ||
| 278 | + obj = clz() | ||
| 279 | + | ||
| 280 | + # parse record header | ||
| 281 | + obj.rec_head = RecordHeader.extract_from(stream) | ||
| 282 | + | ||
| 283 | + size, = struct.unpack('<L', stream.read(4)) | ||
| 284 | + obj.header_token, = struct.unpack('<L', stream.read(4)) | ||
| 285 | + obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4)) | ||
| 286 | + obj.len_user_name, = struct.unpack('<H', stream.read(2)) | ||
| 287 | + obj.doc_file_version, = struct.unpack('<H', stream.read(2)) | ||
| 288 | + obj.major_version, = struct.unpack('<B', stream.read(1)) | ||
| 289 | + obj.minor_version, = struct.unpack('<B', stream.read(1)) | ||
| 290 | + stream.read(2) # unused | ||
| 291 | + obj.ansi_user_name = stream.read(obj.len_user_name) | ||
| 292 | + obj.rel_version, = struct.unpack('<L', stream.read(4)) | ||
| 293 | + obj.unicode_user_name = stream.read(2 * obj.len_user_name) | ||
| 294 | + | ||
| 295 | + return obj | ||
| 296 | + | ||
| 297 | + finally: | ||
| 298 | + if stream is not None: | ||
| 299 | + log.debug('closing stream') | ||
| 300 | + stream.close() | ||
| 301 | + | ||
| 302 | + def check_validity(self): | ||
| 303 | + errs = self.check_rec_head() | ||
| 304 | + errs.extend(self.check_value('size', size, self.SIZE) | ||
| 305 | + errs.extend(self.check_value('headerToken', self.header_token, | ||
| 306 | + [clz.HEADER_TOKEN_ENCRYPT, | ||
| 307 | + clz.HEADER_TOKEN_NOCRYPT])) | ||
| 308 | + errs.extend(self.check_range('lenUserName', self.len_user_name, None, | ||
| 309 | + 256)) | ||
| 310 | + errs.extend(self.check_value('docFileVersion', self.doc_file_version, | ||
| 311 | + clz.DOC_FILE_VERSION)) | ||
| 312 | + errs.extend(self.check_value('majorVersion', self.major_version, | ||
| 313 | + clz.MAJOR_VERSION)) | ||
| 314 | + errs.extend(self.check_value('minorVersion', self.minor_version, | ||
| 315 | + clz.MINOR_VERSION)) | ||
| 316 | + errs.extend(self.check_value('relVersion', self.rel_version, | ||
| 317 | + [clz.REL_VERSION_CAN_USE, | ||
| 318 | + clz.REL_VERSION_NO_USE])) | ||
| 319 | + return errs | ||
| 320 | + | ||
| 321 | + | ||
| 324 | class UserEditAtom(PptType): | 322 | class UserEditAtom(PptType): |
| 325 | """ An atom record that specifies information about a user edit | 323 | """ An atom record that specifies information about a user edit |
| 326 | 324 | ||
| @@ -667,56 +665,56 @@ class DocumentContainer(PptType): | @@ -667,56 +665,56 @@ class DocumentContainer(PptType): | ||
| 667 | # slideHF (variable): An optional SlideHeadersFootersContainer record | 665 | # slideHF (variable): An optional SlideHeadersFootersContainer record |
| 668 | # (section 2.4.15.1) that specifies the default header and footer | 666 | # (section 2.4.15.1) that specifies the default header and footer |
| 669 | # information for presentation slides. | 667 | # information for presentation slides. |
| 670 | - obj.slide_hf = None | 668 | + #obj.slide_hf = None |
| 671 | 669 | ||
| 672 | # notesHF (variable): An optional NotesHeadersFootersContainer record | 670 | # notesHF (variable): An optional NotesHeadersFootersContainer record |
| 673 | # (section 2.4.15.6) that specifies the default header and footer | 671 | # (section 2.4.15.6) that specifies the default header and footer |
| 674 | # information for notes slides. | 672 | # information for notes slides. |
| 675 | - obj.notes_hf = None | 673 | + #obj.notes_hf = None |
| 676 | 674 | ||
| 677 | # slideList (variable): An optional SlideListWithTextContainer record | 675 | # slideList (variable): An optional SlideListWithTextContainer record |
| 678 | # (section 2.4.14.3) that specifies the list of presentation slides. | 676 | # (section 2.4.14.3) that specifies the list of presentation slides. |
| 679 | - obj.slide_list = None | 677 | + #obj.slide_list = None |
| 680 | 678 | ||
| 681 | # notesList (variable): An optional NotesListWithTextContainer record | 679 | # notesList (variable): An optional NotesListWithTextContainer record |
| 682 | # (section 2.4.14.6) that specifies the list of notes slides. | 680 | # (section 2.4.14.6) that specifies the list of notes slides. |
| 683 | - obj.notes_list = None | 681 | + #obj.notes_list = None |
| 684 | 682 | ||
| 685 | # slideShowDocInfoAtom (88 bytes): An optional SlideShowDocInfoAtom | 683 | # slideShowDocInfoAtom (88 bytes): An optional SlideShowDocInfoAtom |
| 686 | # record (section 2.6.1) that specifies slide show information for the | 684 | # record (section 2.6.1) that specifies slide show information for the |
| 687 | # document. | 685 | # document. |
| 688 | - obj.slide_show_doc_info = None | 686 | + #obj.slide_show_doc_info = None |
| 689 | 687 | ||
| 690 | # namedShows (variable): An optional NamedShowsContainer record | 688 | # namedShows (variable): An optional NamedShowsContainer record |
| 691 | # (section 2.6.2) that specifies named shows in the document. | 689 | # (section 2.6.2) that specifies named shows in the document. |
| 692 | - obj.named_shows = None | 690 | + #obj.named_shows = None |
| 693 | 691 | ||
| 694 | # summary (variable): An optional SummaryContainer record (section | 692 | # summary (variable): An optional SummaryContainer record (section |
| 695 | # 2.4.22.3) that specifies bookmarks for the document. | 693 | # 2.4.22.3) that specifies bookmarks for the document. |
| 696 | - obj.summary = None | 694 | + #obj.summary = None |
| 697 | 695 | ||
| 698 | # docRoutingSlipAtom (variable): An optional DocRoutingSlipAtom record | 696 | # docRoutingSlipAtom (variable): An optional DocRoutingSlipAtom record |
| 699 | # (section 2.11.1) that specifies document routing information. | 697 | # (section 2.11.1) that specifies document routing information. |
| 700 | - obj.doc_routing_slip = None | 698 | + #obj.doc_routing_slip = None |
| 701 | 699 | ||
| 702 | # printOptionsAtom (13 bytes): An optional PrintOptionsAtom record | 700 | # printOptionsAtom (13 bytes): An optional PrintOptionsAtom record |
| 703 | # (section 2.4.12) that specifies default print options. | 701 | # (section 2.4.12) that specifies default print options. |
| 704 | - obj.print_options = None | 702 | + #obj.print_options = None |
| 705 | 703 | ||
| 706 | # rtCustomTableStylesAtom1 (variable): An optional | 704 | # rtCustomTableStylesAtom1 (variable): An optional |
| 707 | # RoundTripCustomTableStyles12Atom record (section 2.11.13) that | 705 | # RoundTripCustomTableStyles12Atom record (section 2.11.13) that |
| 708 | # specifies round-trip information for custom table styles. | 706 | # specifies round-trip information for custom table styles. |
| 709 | - obj.rt_custom_table_styles_1 = None | 707 | + #obj.rt_custom_table_styles_1 = None |
| 710 | 708 | ||
| 711 | # endDocumentAtom (8 bytes): An EndDocumentAtom record (section 2.4.13) | 709 | # endDocumentAtom (8 bytes): An EndDocumentAtom record (section 2.4.13) |
| 712 | # that specifies the end of the information for the document. | 710 | # that specifies the end of the information for the document. |
| 713 | - obj.end_document = None | 711 | + #obj.end_document = None |
| 714 | 712 | ||
| 715 | # rtCustomTableStylesAtom2 (variable): An optional | 713 | # rtCustomTableStylesAtom2 (variable): An optional |
| 716 | # RoundTripCustomTableStyles12Atom record that specifies round-trip | 714 | # RoundTripCustomTableStyles12Atom record that specifies round-trip |
| 717 | # information for custom table styles. It MUST NOT exist if | 715 | # information for custom table styles. It MUST NOT exist if |
| 718 | # rtCustomTableStylesAtom1 exists. | 716 | # rtCustomTableStylesAtom1 exists. |
| 719 | - obj.rt_custom_table_styles_2 = None | 717 | + #obj.rt_custom_table_styles_2 = None |
| 720 | 718 | ||
| 721 | return obj | 719 | return obj |
| 722 | 720 | ||
| @@ -897,13 +895,22 @@ class PptParser(object): | @@ -897,13 +895,22 @@ class PptParser(object): | ||
| 897 | log.warning('re-reading and overwriting ' | 895 | log.warning('re-reading and overwriting ' |
| 898 | 'previously read current_user_atom') | 896 | 'previously read current_user_atom') |
| 899 | 897 | ||
| 898 | + log.debug('parsing "Current User"') | ||
| 899 | + | ||
| 900 | + stream = None | ||
| 900 | try: | 901 | try: |
| 901 | - self.current_user_atom = CurrentUserAtom.extract_from(self.ole) | 902 | + log.debug('opening stream') |
| 903 | + stream = self.ole.openstream('Current User') | ||
| 904 | + self.current_user_atom = CurrentUserAtom.extract_from(stream) | ||
| 902 | except Exception: | 905 | except Exception: |
| 903 | if self.fast_fail: | 906 | if self.fast_fail: |
| 904 | raise | 907 | raise |
| 905 | else: | 908 | else: |
| 906 | self._log_exception() | 909 | self._log_exception() |
| 910 | + finally: | ||
| 911 | + if stream is not None: | ||
| 912 | + log.debug('closing stream') | ||
| 913 | + stream.close() | ||
| 907 | 914 | ||
| 908 | def parse_persist_object_directory(self): | 915 | def parse_persist_object_directory(self): |
| 909 | """ Part 1: Construct the persist object directory """ | 916 | """ Part 1: Construct the persist object directory """ |