Commit 87a69ade07f3bd276349eea7d32013b3b1240177

Authored by Christian Herdtweck
1 parent 62c927a8

made CurrentUserAtom a PptType -- streamlined last remaining class (currently)

Showing 1 changed file with 112 additions and 105 deletions
oletools/ppt_parser.py
@@ -16,7 +16,6 @@ References: @@ -16,7 +16,6 @@ References:
16 # TODO 16 # TODO
17 #------------------------------------------------------------------------------ 17 #------------------------------------------------------------------------------
18 # TODO: 18 # TODO:
19 -# - make CurrentUserAtom and UserEditAtom PptTypes; adjust parse  
20 # - make stream optional in PptUnexpectedData 19 # - make stream optional in PptUnexpectedData
21 # - can speed-up by using less bigger struct.parse calls? 20 # - can speed-up by using less bigger struct.parse calls?
22 # - license 21 # - license
@@ -125,97 +124,6 @@ class RecordHeader(object): @@ -125,97 +124,6 @@ class RecordHeader(object):
125 return obj 124 return obj
126 125
127 126
128 -class CurrentUserAtom(object):  
129 - """ An atom record that specifies information about the last user to modify  
130 - the file and where the most recent user edit is located. This is the only  
131 - record in the Current User Stream (section 2.1.1).  
132 -  
133 - https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx  
134 - """  
135 -  
136 - # allowed values for header_token  
137 - HEADER_TOKEN_ENCRYPT = 0xF3D1C4DF  
138 - HEADER_TOKEN_NOCRYPT = 0xE391C05F  
139 -  
140 - # allowed values for rel_version  
141 - REL_VERSION_CAN_USE = 0x00000008  
142 - REL_VERSION_NO_USE = 0x00000009  
143 -  
144 - # required values  
145 - RECORD_TYPE = 0x0FF6  
146 - SIZE = 0x14  
147 - DOC_FILE_VERSION = 0x03F4  
148 - MAJOR_VERSION = 0x03  
149 - MINOR_VERSION = 0x00  
150 -  
151 - def __init__(self):  
152 - self.rec_head = None  
153 - self.size = None  
154 - self.header_token = None  
155 - self.offset_to_current_edit = None  
156 - self.len_user_name = None  
157 - self.doc_file_version = None  
158 - self.major_version = None  
159 - self.minor_version = None  
160 - self.ansi_user_name = None  
161 - self.unicode_user_name = None  
162 - self.rel_version = None  
163 -  
164 - def is_encrypted(self):  
165 - return self.header_token == self.HEADER_TOKEN_ENCRYPT  
166 -  
167 - @classmethod  
168 - def extract_from(clz, ole):  
169 - """ extract info from olefile """  
170 -  
171 - log.debug('parsing "Current User"')  
172 -  
173 - stream = None  
174 - try:  
175 - # open stream  
176 - log.debug('opening stream')  
177 - stream = ole.openstream('Current User')  
178 - obj = clz()  
179 -  
180 - # parse record header  
181 - obj.rec_head = RecordHeader.extract_from(stream)  
182 - check_value('rec_version', obj.rec_head.rec_ver, 0)  
183 - check_value('rec_instance', obj.rec_head.rec_instance, 0)  
184 - check_value('rec_type', obj.rec_head.rec_type, clz.RECORD_TYPE)  
185 -  
186 - size, = struct.unpack('<L', stream.read(4))  
187 - check_value('size', size, obj.SIZE)  
188 - obj.header_token, = struct.unpack('<L', stream.read(4))  
189 - check_value('headerToken', obj.header_token,  
190 - [clz.HEADER_TOKEN_ENCRYPT, clz.HEADER_TOKEN_NOCRYPT])  
191 - obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4))  
192 - obj.len_user_name, = struct.unpack('<H', stream.read(2))  
193 - if obj.len_user_name > 255:  
194 - raise PptUnexpectedData(  
195 - 'Current User', 'CurrentUserAtom.lenUserName',  
196 - obj.len_user_name, '< 256')  
197 - obj.doc_file_version, = struct.unpack('<H', stream.read(2))  
198 - check_value('docFileVersion', obj.doc_file_version,  
199 - clz.DOC_FILE_VERSION)  
200 - obj.major_version, = struct.unpack('<B', stream.read(1))  
201 - check_value('majorVersion', obj.major_version, clz.MAJOR_VERSION)  
202 - obj.minor_version, = struct.unpack('<B', stream.read(1))  
203 - check_value('minorVersion', obj.minor_version, clz.MINOR_VERSION)  
204 - stream.read(2) # unused  
205 - obj.ansi_user_name = stream.read(obj.len_user_name)  
206 - obj.rel_version, = struct.unpack('<L', stream.read(4))  
207 - check_value('relVersion', obj.rel_version,  
208 - [clz.REL_VERSION_CAN_USE, clz.REL_VERSION_NO_USE])  
209 - obj.unicode_user_name = stream.read(2 * obj.len_user_name)  
210 -  
211 - return obj  
212 -  
213 - finally:  
214 - if stream is not None:  
215 - log.debug('closing stream')  
216 - stream.close()  
217 -  
218 -  
219 class PptType(object): 127 class PptType(object):
220 """ base class of data types found in ppt ole files 128 """ base class of data types found in ppt ole files
221 129
@@ -321,6 +229,96 @@ class PptType(object): @@ -321,6 +229,96 @@ class PptType(object):
321 return errs 229 return errs
322 230
323 231
  232 +class CurrentUserAtom(PptType):
  233 + """ An atom record that specifies information about the last user to modify
  234 + the file and where the most recent user edit is located. This is the only
  235 + record in the Current User Stream (section 2.1.1).
  236 +
  237 + https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx
  238 + """
  239 +
  240 + # allowed values for header_token
  241 + HEADER_TOKEN_ENCRYPT = 0xF3D1C4DF
  242 + HEADER_TOKEN_NOCRYPT = 0xE391C05F
  243 +
  244 + # allowed values for rel_version
  245 + REL_VERSION_CAN_USE = 0x00000008
  246 + REL_VERSION_NO_USE = 0x00000009
  247 +
  248 + # required values
  249 + RECORD_TYPE = 0x0FF6
  250 + SIZE = 0x14
  251 + DOC_FILE_VERSION = 0x03F4
  252 + MAJOR_VERSION = 0x03
  253 + MINOR_VERSION = 0x00
  254 +
  255 + def __init__(self):
  256 + super(CurrentUserAtom, self).__init__(stream_name='Current User')
  257 + self.rec_head = None
  258 + self.size = None
  259 + self.header_token = None
  260 + self.offset_to_current_edit = None
  261 + self.len_user_name = None
  262 + self.doc_file_version = None
  263 + self.major_version = None
  264 + self.minor_version = None
  265 + self.ansi_user_name = None
  266 + self.unicode_user_name = None
  267 + self.rel_version = None
  268 +
  269 + def is_encrypted(self):
  270 + return self.header_token == self.HEADER_TOKEN_ENCRYPT
  271 +
  272 + @classmethod
  273 + def extract_from(clz, stream):
  274 + """ create instance with info from stream """
  275 +
  276 + stream = None
  277 + try:
  278 + obj = clz()
  279 +
  280 + # parse record header
  281 + obj.rec_head = RecordHeader.extract_from(stream)
  282 +
  283 + size, = struct.unpack('<L', stream.read(4))
  284 + obj.header_token, = struct.unpack('<L', stream.read(4))
  285 + obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4))
  286 + obj.len_user_name, = struct.unpack('<H', stream.read(2))
  287 + obj.doc_file_version, = struct.unpack('<H', stream.read(2))
  288 + obj.major_version, = struct.unpack('<B', stream.read(1))
  289 + obj.minor_version, = struct.unpack('<B', stream.read(1))
  290 + stream.read(2) # unused
  291 + obj.ansi_user_name = stream.read(obj.len_user_name)
  292 + obj.rel_version, = struct.unpack('<L', stream.read(4))
  293 + obj.unicode_user_name = stream.read(2 * obj.len_user_name)
  294 +
  295 + return obj
  296 +
  297 + finally:
  298 + if stream is not None:
  299 + log.debug('closing stream')
  300 + stream.close()
  301 +
  302 + def check_validity(self):
  303 + errs = self.check_rec_head()
  304 + errs.extend(self.check_value('size', size, self.SIZE)
  305 + errs.extend(self.check_value('headerToken', self.header_token,
  306 + [clz.HEADER_TOKEN_ENCRYPT,
  307 + clz.HEADER_TOKEN_NOCRYPT]))
  308 + errs.extend(self.check_range('lenUserName', self.len_user_name, None,
  309 + 256))
  310 + errs.extend(self.check_value('docFileVersion', self.doc_file_version,
  311 + clz.DOC_FILE_VERSION))
  312 + errs.extend(self.check_value('majorVersion', self.major_version,
  313 + clz.MAJOR_VERSION))
  314 + errs.extend(self.check_value('minorVersion', self.minor_version,
  315 + clz.MINOR_VERSION))
  316 + errs.extend(self.check_value('relVersion', self.rel_version,
  317 + [clz.REL_VERSION_CAN_USE,
  318 + clz.REL_VERSION_NO_USE]))
  319 + return errs
  320 +
  321 +
324 class UserEditAtom(PptType): 322 class UserEditAtom(PptType):
325 """ An atom record that specifies information about a user edit 323 """ An atom record that specifies information about a user edit
326 324
@@ -667,56 +665,56 @@ class DocumentContainer(PptType): @@ -667,56 +665,56 @@ class DocumentContainer(PptType):
667 # slideHF (variable): An optional SlideHeadersFootersContainer record 665 # slideHF (variable): An optional SlideHeadersFootersContainer record
668 # (section 2.4.15.1) that specifies the default header and footer 666 # (section 2.4.15.1) that specifies the default header and footer
669 # information for presentation slides. 667 # information for presentation slides.
670 - obj.slide_hf = None 668 + #obj.slide_hf = None
671 669
672 # notesHF (variable): An optional NotesHeadersFootersContainer record 670 # notesHF (variable): An optional NotesHeadersFootersContainer record
673 # (section 2.4.15.6) that specifies the default header and footer 671 # (section 2.4.15.6) that specifies the default header and footer
674 # information for notes slides. 672 # information for notes slides.
675 - obj.notes_hf = None 673 + #obj.notes_hf = None
676 674
677 # slideList (variable): An optional SlideListWithTextContainer record 675 # slideList (variable): An optional SlideListWithTextContainer record
678 # (section 2.4.14.3) that specifies the list of presentation slides. 676 # (section 2.4.14.3) that specifies the list of presentation slides.
679 - obj.slide_list = None 677 + #obj.slide_list = None
680 678
681 # notesList (variable): An optional NotesListWithTextContainer record 679 # notesList (variable): An optional NotesListWithTextContainer record
682 # (section 2.4.14.6) that specifies the list of notes slides. 680 # (section 2.4.14.6) that specifies the list of notes slides.
683 - obj.notes_list = None 681 + #obj.notes_list = None
684 682
685 # slideShowDocInfoAtom (88 bytes): An optional SlideShowDocInfoAtom 683 # slideShowDocInfoAtom (88 bytes): An optional SlideShowDocInfoAtom
686 # record (section 2.6.1) that specifies slide show information for the 684 # record (section 2.6.1) that specifies slide show information for the
687 # document. 685 # document.
688 - obj.slide_show_doc_info = None 686 + #obj.slide_show_doc_info = None
689 687
690 # namedShows (variable): An optional NamedShowsContainer record 688 # namedShows (variable): An optional NamedShowsContainer record
691 # (section 2.6.2) that specifies named shows in the document. 689 # (section 2.6.2) that specifies named shows in the document.
692 - obj.named_shows = None 690 + #obj.named_shows = None
693 691
694 # summary (variable): An optional SummaryContainer record (section 692 # summary (variable): An optional SummaryContainer record (section
695 # 2.4.22.3) that specifies bookmarks for the document. 693 # 2.4.22.3) that specifies bookmarks for the document.
696 - obj.summary = None 694 + #obj.summary = None
697 695
698 # docRoutingSlipAtom (variable): An optional DocRoutingSlipAtom record 696 # docRoutingSlipAtom (variable): An optional DocRoutingSlipAtom record
699 # (section 2.11.1) that specifies document routing information. 697 # (section 2.11.1) that specifies document routing information.
700 - obj.doc_routing_slip = None 698 + #obj.doc_routing_slip = None
701 699
702 # printOptionsAtom (13 bytes): An optional PrintOptionsAtom record 700 # printOptionsAtom (13 bytes): An optional PrintOptionsAtom record
703 # (section 2.4.12) that specifies default print options. 701 # (section 2.4.12) that specifies default print options.
704 - obj.print_options = None 702 + #obj.print_options = None
705 703
706 # rtCustomTableStylesAtom1 (variable): An optional 704 # rtCustomTableStylesAtom1 (variable): An optional
707 # RoundTripCustomTableStyles12Atom record (section 2.11.13) that 705 # RoundTripCustomTableStyles12Atom record (section 2.11.13) that
708 # specifies round-trip information for custom table styles. 706 # specifies round-trip information for custom table styles.
709 - obj.rt_custom_table_styles_1 = None 707 + #obj.rt_custom_table_styles_1 = None
710 708
711 # endDocumentAtom (8 bytes): An EndDocumentAtom record (section 2.4.13) 709 # endDocumentAtom (8 bytes): An EndDocumentAtom record (section 2.4.13)
712 # that specifies the end of the information for the document. 710 # that specifies the end of the information for the document.
713 - obj.end_document = None 711 + #obj.end_document = None
714 712
715 # rtCustomTableStylesAtom2 (variable): An optional 713 # rtCustomTableStylesAtom2 (variable): An optional
716 # RoundTripCustomTableStyles12Atom record that specifies round-trip 714 # RoundTripCustomTableStyles12Atom record that specifies round-trip
717 # information for custom table styles. It MUST NOT exist if 715 # information for custom table styles. It MUST NOT exist if
718 # rtCustomTableStylesAtom1 exists. 716 # rtCustomTableStylesAtom1 exists.
719 - obj.rt_custom_table_styles_2 = None 717 + #obj.rt_custom_table_styles_2 = None
720 718
721 return obj 719 return obj
722 720
@@ -897,13 +895,22 @@ class PptParser(object): @@ -897,13 +895,22 @@ class PptParser(object):
897 log.warning('re-reading and overwriting ' 895 log.warning('re-reading and overwriting '
898 'previously read current_user_atom') 896 'previously read current_user_atom')
899 897
  898 + log.debug('parsing "Current User"')
  899 +
  900 + stream = None
900 try: 901 try:
901 - self.current_user_atom = CurrentUserAtom.extract_from(self.ole) 902 + log.debug('opening stream')
  903 + stream = self.ole.openstream('Current User')
  904 + self.current_user_atom = CurrentUserAtom.extract_from(stream)
902 except Exception: 905 except Exception:
903 if self.fast_fail: 906 if self.fast_fail:
904 raise 907 raise
905 else: 908 else:
906 self._log_exception() 909 self._log_exception()
  910 + finally:
  911 + if stream is not None:
  912 + log.debug('closing stream')
  913 + stream.close()
907 914
908 def parse_persist_object_directory(self): 915 def parse_persist_object_directory(self):
909 """ Part 1: Construct the persist object directory """ 916 """ Part 1: Construct the persist object directory """