Commit 87a69ade07f3bd276349eea7d32013b3b1240177

Authored by Christian Herdtweck
1 parent 62c927a8

made CurrentUserAtom a PptType -- streamlined last remaining class (currently)

Showing 1 changed file with 112 additions and 105 deletions
oletools/ppt_parser.py
... ... @@ -16,7 +16,6 @@ References:
16 16 # TODO
17 17 #------------------------------------------------------------------------------
18 18 # TODO:
19   -# - make CurrentUserAtom and UserEditAtom PptTypes; adjust parse
20 19 # - make stream optional in PptUnexpectedData
21 20 # - can speed-up by using less bigger struct.parse calls?
22 21 # - license
... ... @@ -125,97 +124,6 @@ class RecordHeader(object):
125 124 return obj
126 125  
127 126  
128   -class CurrentUserAtom(object):
129   - """ An atom record that specifies information about the last user to modify
130   - the file and where the most recent user edit is located. This is the only
131   - record in the Current User Stream (section 2.1.1).
132   -
133   - https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx
134   - """
135   -
136   - # allowed values for header_token
137   - HEADER_TOKEN_ENCRYPT = 0xF3D1C4DF
138   - HEADER_TOKEN_NOCRYPT = 0xE391C05F
139   -
140   - # allowed values for rel_version
141   - REL_VERSION_CAN_USE = 0x00000008
142   - REL_VERSION_NO_USE = 0x00000009
143   -
144   - # required values
145   - RECORD_TYPE = 0x0FF6
146   - SIZE = 0x14
147   - DOC_FILE_VERSION = 0x03F4
148   - MAJOR_VERSION = 0x03
149   - MINOR_VERSION = 0x00
150   -
151   - def __init__(self):
152   - self.rec_head = None
153   - self.size = None
154   - self.header_token = None
155   - self.offset_to_current_edit = None
156   - self.len_user_name = None
157   - self.doc_file_version = None
158   - self.major_version = None
159   - self.minor_version = None
160   - self.ansi_user_name = None
161   - self.unicode_user_name = None
162   - self.rel_version = None
163   -
164   - def is_encrypted(self):
165   - return self.header_token == self.HEADER_TOKEN_ENCRYPT
166   -
167   - @classmethod
168   - def extract_from(clz, ole):
169   - """ extract info from olefile """
170   -
171   - log.debug('parsing "Current User"')
172   -
173   - stream = None
174   - try:
175   - # open stream
176   - log.debug('opening stream')
177   - stream = ole.openstream('Current User')
178   - obj = clz()
179   -
180   - # parse record header
181   - obj.rec_head = RecordHeader.extract_from(stream)
182   - check_value('rec_version', obj.rec_head.rec_ver, 0)
183   - check_value('rec_instance', obj.rec_head.rec_instance, 0)
184   - check_value('rec_type', obj.rec_head.rec_type, clz.RECORD_TYPE)
185   -
186   - size, = struct.unpack('<L', stream.read(4))
187   - check_value('size', size, obj.SIZE)
188   - obj.header_token, = struct.unpack('<L', stream.read(4))
189   - check_value('headerToken', obj.header_token,
190   - [clz.HEADER_TOKEN_ENCRYPT, clz.HEADER_TOKEN_NOCRYPT])
191   - obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4))
192   - obj.len_user_name, = struct.unpack('<H', stream.read(2))
193   - if obj.len_user_name > 255:
194   - raise PptUnexpectedData(
195   - 'Current User', 'CurrentUserAtom.lenUserName',
196   - obj.len_user_name, '< 256')
197   - obj.doc_file_version, = struct.unpack('<H', stream.read(2))
198   - check_value('docFileVersion', obj.doc_file_version,
199   - clz.DOC_FILE_VERSION)
200   - obj.major_version, = struct.unpack('<B', stream.read(1))
201   - check_value('majorVersion', obj.major_version, clz.MAJOR_VERSION)
202   - obj.minor_version, = struct.unpack('<B', stream.read(1))
203   - check_value('minorVersion', obj.minor_version, clz.MINOR_VERSION)
204   - stream.read(2) # unused
205   - obj.ansi_user_name = stream.read(obj.len_user_name)
206   - obj.rel_version, = struct.unpack('<L', stream.read(4))
207   - check_value('relVersion', obj.rel_version,
208   - [clz.REL_VERSION_CAN_USE, clz.REL_VERSION_NO_USE])
209   - obj.unicode_user_name = stream.read(2 * obj.len_user_name)
210   -
211   - return obj
212   -
213   - finally:
214   - if stream is not None:
215   - log.debug('closing stream')
216   - stream.close()
217   -
218   -
219 127 class PptType(object):
220 128 """ base class of data types found in ppt ole files
221 129  
... ... @@ -321,6 +229,96 @@ class PptType(object):
321 229 return errs
322 230  
323 231  
  232 +class CurrentUserAtom(PptType):
  233 + """ An atom record that specifies information about the last user to modify
  234 + the file and where the most recent user edit is located. This is the only
  235 + record in the Current User Stream (section 2.1.1).
  236 +
  237 + https://msdn.microsoft.com/en-us/library/dd948895%28v=office.12%29.aspx
  238 + """
  239 +
  240 + # allowed values for header_token
  241 + HEADER_TOKEN_ENCRYPT = 0xF3D1C4DF
  242 + HEADER_TOKEN_NOCRYPT = 0xE391C05F
  243 +
  244 + # allowed values for rel_version
  245 + REL_VERSION_CAN_USE = 0x00000008
  246 + REL_VERSION_NO_USE = 0x00000009
  247 +
  248 + # required values
  249 + RECORD_TYPE = 0x0FF6
  250 + SIZE = 0x14
  251 + DOC_FILE_VERSION = 0x03F4
  252 + MAJOR_VERSION = 0x03
  253 + MINOR_VERSION = 0x00
  254 +
  255 + def __init__(self):
  256 + super(CurrentUserAtom, self).__init__(stream_name='Current User')
  257 + self.rec_head = None
  258 + self.size = None
  259 + self.header_token = None
  260 + self.offset_to_current_edit = None
  261 + self.len_user_name = None
  262 + self.doc_file_version = None
  263 + self.major_version = None
  264 + self.minor_version = None
  265 + self.ansi_user_name = None
  266 + self.unicode_user_name = None
  267 + self.rel_version = None
  268 +
  269 + def is_encrypted(self):
  270 + return self.header_token == self.HEADER_TOKEN_ENCRYPT
  271 +
  272 + @classmethod
  273 + def extract_from(clz, stream):
  274 + """ create instance with info from stream """
  275 +
  276 + stream = None
  277 + try:
  278 + obj = clz()
  279 +
  280 + # parse record header
  281 + obj.rec_head = RecordHeader.extract_from(stream)
  282 +
  283 + size, = struct.unpack('<L', stream.read(4))
  284 + obj.header_token, = struct.unpack('<L', stream.read(4))
  285 + obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4))
  286 + obj.len_user_name, = struct.unpack('<H', stream.read(2))
  287 + obj.doc_file_version, = struct.unpack('<H', stream.read(2))
  288 + obj.major_version, = struct.unpack('<B', stream.read(1))
  289 + obj.minor_version, = struct.unpack('<B', stream.read(1))
  290 + stream.read(2) # unused
  291 + obj.ansi_user_name = stream.read(obj.len_user_name)
  292 + obj.rel_version, = struct.unpack('<L', stream.read(4))
  293 + obj.unicode_user_name = stream.read(2 * obj.len_user_name)
  294 +
  295 + return obj
  296 +
  297 + finally:
  298 + if stream is not None:
  299 + log.debug('closing stream')
  300 + stream.close()
  301 +
  302 + def check_validity(self):
  303 + errs = self.check_rec_head()
  304 + errs.extend(self.check_value('size', size, self.SIZE)
  305 + errs.extend(self.check_value('headerToken', self.header_token,
  306 + [clz.HEADER_TOKEN_ENCRYPT,
  307 + clz.HEADER_TOKEN_NOCRYPT]))
  308 + errs.extend(self.check_range('lenUserName', self.len_user_name, None,
  309 + 256))
  310 + errs.extend(self.check_value('docFileVersion', self.doc_file_version,
  311 + clz.DOC_FILE_VERSION))
  312 + errs.extend(self.check_value('majorVersion', self.major_version,
  313 + clz.MAJOR_VERSION))
  314 + errs.extend(self.check_value('minorVersion', self.minor_version,
  315 + clz.MINOR_VERSION))
  316 + errs.extend(self.check_value('relVersion', self.rel_version,
  317 + [clz.REL_VERSION_CAN_USE,
  318 + clz.REL_VERSION_NO_USE]))
  319 + return errs
  320 +
  321 +
324 322 class UserEditAtom(PptType):
325 323 """ An atom record that specifies information about a user edit
326 324  
... ... @@ -667,56 +665,56 @@ class DocumentContainer(PptType):
667 665 # slideHF (variable): An optional SlideHeadersFootersContainer record
668 666 # (section 2.4.15.1) that specifies the default header and footer
669 667 # information for presentation slides.
670   - obj.slide_hf = None
  668 + #obj.slide_hf = None
671 669  
672 670 # notesHF (variable): An optional NotesHeadersFootersContainer record
673 671 # (section 2.4.15.6) that specifies the default header and footer
674 672 # information for notes slides.
675   - obj.notes_hf = None
  673 + #obj.notes_hf = None
676 674  
677 675 # slideList (variable): An optional SlideListWithTextContainer record
678 676 # (section 2.4.14.3) that specifies the list of presentation slides.
679   - obj.slide_list = None
  677 + #obj.slide_list = None
680 678  
681 679 # notesList (variable): An optional NotesListWithTextContainer record
682 680 # (section 2.4.14.6) that specifies the list of notes slides.
683   - obj.notes_list = None
  681 + #obj.notes_list = None
684 682  
685 683 # slideShowDocInfoAtom (88 bytes): An optional SlideShowDocInfoAtom
686 684 # record (section 2.6.1) that specifies slide show information for the
687 685 # document.
688   - obj.slide_show_doc_info = None
  686 + #obj.slide_show_doc_info = None
689 687  
690 688 # namedShows (variable): An optional NamedShowsContainer record
691 689 # (section 2.6.2) that specifies named shows in the document.
692   - obj.named_shows = None
  690 + #obj.named_shows = None
693 691  
694 692 # summary (variable): An optional SummaryContainer record (section
695 693 # 2.4.22.3) that specifies bookmarks for the document.
696   - obj.summary = None
  694 + #obj.summary = None
697 695  
698 696 # docRoutingSlipAtom (variable): An optional DocRoutingSlipAtom record
699 697 # (section 2.11.1) that specifies document routing information.
700   - obj.doc_routing_slip = None
  698 + #obj.doc_routing_slip = None
701 699  
702 700 # printOptionsAtom (13 bytes): An optional PrintOptionsAtom record
703 701 # (section 2.4.12) that specifies default print options.
704   - obj.print_options = None
  702 + #obj.print_options = None
705 703  
706 704 # rtCustomTableStylesAtom1 (variable): An optional
707 705 # RoundTripCustomTableStyles12Atom record (section 2.11.13) that
708 706 # specifies round-trip information for custom table styles.
709   - obj.rt_custom_table_styles_1 = None
  707 + #obj.rt_custom_table_styles_1 = None
710 708  
711 709 # endDocumentAtom (8 bytes): An EndDocumentAtom record (section 2.4.13)
712 710 # that specifies the end of the information for the document.
713   - obj.end_document = None
  711 + #obj.end_document = None
714 712  
715 713 # rtCustomTableStylesAtom2 (variable): An optional
716 714 # RoundTripCustomTableStyles12Atom record that specifies round-trip
717 715 # information for custom table styles. It MUST NOT exist if
718 716 # rtCustomTableStylesAtom1 exists.
719   - obj.rt_custom_table_styles_2 = None
  717 + #obj.rt_custom_table_styles_2 = None
720 718  
721 719 return obj
722 720  
... ... @@ -897,13 +895,22 @@ class PptParser(object):
897 895 log.warning('re-reading and overwriting '
898 896 'previously read current_user_atom')
899 897  
  898 + log.debug('parsing "Current User"')
  899 +
  900 + stream = None
900 901 try:
901   - self.current_user_atom = CurrentUserAtom.extract_from(self.ole)
  902 + log.debug('opening stream')
  903 + stream = self.ole.openstream('Current User')
  904 + self.current_user_atom = CurrentUserAtom.extract_from(stream)
902 905 except Exception:
903 906 if self.fast_fail:
904 907 raise
905 908 else:
906 909 self._log_exception()
  910 + finally:
  911 + if stream is not None:
  912 + log.debug('closing stream')
  913 + stream.close()
907 914  
908 915 def parse_persist_object_directory(self):
909 916 """ Part 1: Construct the persist object directory """
... ...