Commit 27e0a1c88ce0c6da147fb6f9b6224e0e9429bb10

Authored by Christian Herdtweck
1 parent 87a69ade

bugfixing but failed to correctly parse DocumentContainer :-(

Showing 1 changed file with 185 additions and 47 deletions
oletools/ppt_parser.py
@@ -121,6 +121,9 @@ class RecordHeader(object): @@ -121,6 +121,9 @@ class RecordHeader(object):
121 obj.rec_instance, obj.rec_ver = divmod(version_instance, 2**4) 121 obj.rec_instance, obj.rec_ver = divmod(version_instance, 2**4)
122 obj.rec_type, = struct.unpack('<H', stream.read(2)) 122 obj.rec_type, = struct.unpack('<H', stream.read(2))
123 obj.rec_len, = struct.unpack('<L', stream.read(4)) 123 obj.rec_len, = struct.unpack('<L', stream.read(4))
  124 + log.debug('type is {0:04X}, instance {1:04X}, version {2:04X}, len {3}'
  125 + .format(obj.rec_type, obj.rec_instance, obj.rec_ver,
  126 + obj.rec_len))
124 return obj 127 return obj
125 128
126 129
@@ -273,35 +276,28 @@ class CurrentUserAtom(PptType): @@ -273,35 +276,28 @@ class CurrentUserAtom(PptType):
273 def extract_from(clz, stream): 276 def extract_from(clz, stream):
274 """ create instance with info from stream """ 277 """ create instance with info from stream """
275 278
276 - stream = None  
277 - try:  
278 - obj = clz()  
279 -  
280 - # parse record header  
281 - obj.rec_head = RecordHeader.extract_from(stream) 279 + obj = clz()
282 280
283 - size, = struct.unpack('<L', stream.read(4))  
284 - obj.header_token, = struct.unpack('<L', stream.read(4))  
285 - obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4))  
286 - obj.len_user_name, = struct.unpack('<H', stream.read(2))  
287 - obj.doc_file_version, = struct.unpack('<H', stream.read(2))  
288 - obj.major_version, = struct.unpack('<B', stream.read(1))  
289 - obj.minor_version, = struct.unpack('<B', stream.read(1))  
290 - stream.read(2) # unused  
291 - obj.ansi_user_name = stream.read(obj.len_user_name)  
292 - obj.rel_version, = struct.unpack('<L', stream.read(4))  
293 - obj.unicode_user_name = stream.read(2 * obj.len_user_name) 281 + # parse record header
  282 + obj.rec_head = RecordHeader.extract_from(stream)
294 283
295 - return obj 284 + obj.size, = struct.unpack('<L', stream.read(4))
  285 + obj.header_token, = struct.unpack('<L', stream.read(4))
  286 + obj.offset_to_current_edit, = struct.unpack('<L', stream.read(4))
  287 + obj.len_user_name, = struct.unpack('<H', stream.read(2))
  288 + obj.doc_file_version, = struct.unpack('<H', stream.read(2))
  289 + obj.major_version, = struct.unpack('<B', stream.read(1))
  290 + obj.minor_version, = struct.unpack('<B', stream.read(1))
  291 + stream.read(2) # unused
  292 + obj.ansi_user_name = stream.read(obj.len_user_name)
  293 + obj.rel_version, = struct.unpack('<L', stream.read(4))
  294 + obj.unicode_user_name = stream.read(2 * obj.len_user_name)
296 295
297 - finally:  
298 - if stream is not None:  
299 - log.debug('closing stream')  
300 - stream.close() 296 + return obj
301 297
302 def check_validity(self): 298 def check_validity(self):
303 errs = self.check_rec_head() 299 errs = self.check_rec_head()
304 - errs.extend(self.check_value('size', size, self.SIZE) 300 + errs.extend(self.check_value('size', self.size, self.SIZE))
305 errs.extend(self.check_value('headerToken', self.header_token, 301 errs.extend(self.check_value('headerToken', self.header_token,
306 [clz.HEADER_TOKEN_ENCRYPT, 302 [clz.HEADER_TOKEN_ENCRYPT,
307 clz.HEADER_TOKEN_NOCRYPT])) 303 clz.HEADER_TOKEN_NOCRYPT]))
@@ -567,6 +563,52 @@ class PersistDirectoryEntry(object): @@ -567,6 +563,52 @@ class PersistDirectoryEntry(object):
567 return errs 563 return errs
568 564
569 565
  566 +class DocInfoListSubContainerOrAtom(PptType):
  567 + """ one of various types found in a DocInfoListContainer
  568 +
  569 + https://msdn.microsoft.com/en-us/library/dd921705%28v=office.12%29.aspx
  570 +
  571 + actual type of this object is defined by the recVersion field in its Record
  572 + Head
  573 +
  574 + Similar to DummyType, RECORD_TYPE varies from instance to instance for this
  575 + type
  576 + """
  577 +
  578 + # RECORD_TYPE varies, is specified only in extract_from
  579 + VALID_RECORD_TYPES = [0x1388, # self.RECORD_TYPE_PROG_TAGS, \
  580 + 0x0414, # self.RECORD_TYPE_NORMAL_VIEW_SET_INFO_9, \
  581 + 0x0413, # self.RECORD_TYPE_NOTES_TEXT_VIEW_INFO_9, \
  582 + 0x0407, # self.RECORD_TYPE_OUTLINE_VIEW_INFO, \
  583 + 0x03FA, # self.RECORD_TYPE_SLIDE_VIEW_INFO, \
  584 + 0x0408] # self.RECORD_TYPE_SORTER_VIEW_INFO
  585 +
  586 + def __init__(self):
  587 + super(DocInfoListSubContainerOrAtom, self).__init__()
  588 +
  589 + @classmethod
  590 + def extract_from(clz, stream):
  591 + """ build instance with info read from stream """
  592 +
  593 + log.debug('Parsing DocInfoListSubContainerOrAtom from stream')
  594 +
  595 + obj = clz()
  596 + obj.read_rec_head(stream)
  597 + if obj.rec_head.rec_type == VBAInfoContainer.RECORD_TYPE:
  598 + obj = VBAInfoContainer.extract_from(stream, obj.rec_head)
  599 + else:
  600 + log.debug('skipping over {} Byte in DocInfoListSubContainerOrAtom'
  601 + .format(obj.rec_head.rec_len))
  602 + log.debug('start at pos {}'.format(stream.tell()))
  603 + stream.seek(obj.rec_head.rec_len, os.SEEK_CUR)
  604 + log.debug('now at pos {}'.format(stream.tell()))
  605 + return obj
  606 +
  607 + def check_validity(self):
  608 + """ can be any of multiple types """
  609 + self.check_value(self.rec_head.rec_type, self.VALID_RECORD_TYPES)
  610 +
  611 +
570 class DocInfoListContainer(PptType): 612 class DocInfoListContainer(PptType):
571 """ information about the document and document display settings 613 """ information about the document and document display settings
572 614
@@ -579,6 +621,40 @@ class DocInfoListContainer(PptType): @@ -579,6 +621,40 @@ class DocInfoListContainer(PptType):
579 def __init__(self): 621 def __init__(self):
580 super(DocInfoListContainer, self).__init__() 622 super(DocInfoListContainer, self).__init__()
581 623
  624 + @classmethod
  625 + def extract_from(clz, stream):
  626 + """ build instance with info read from stream """
  627 +
  628 + log.debug('Parsing DocInfoListContainer from stream')
  629 + obj = clz()
  630 + obj.read_rec_head(stream)
  631 +
  632 + # rgChildRec (variable): An array of DocInfoListSubContainerOrAtom
  633 + # records (section 2.4.5) that specifies information about the document
  634 + # or how the document is displayed. The size, in bytes, of the array is
  635 + # specified by rh.recLen
  636 + curr_pos = stream.tell()
  637 + end_pos = curr_pos + obj.rec_head.rec_len
  638 + log.debug('start reading at pos {}, will read until {}'
  639 + .format(curr_pos, end_pos))
  640 + bytes_read = 0
  641 + obj.rg_child_rec = []
  642 +
  643 + while curr_pos < end_pos:
  644 + new_obj = DocInfoListSubContainerOrAtom().extract_from(stream)
  645 + obj.rg_child_rec.append(new_obj)
  646 + curr_pos = stream.tell()
  647 + log.debug('now at pos {}'.format(curr_pos))
  648 +
  649 + log.debug('reached end pos {} ({}). stop reading DocInfoListContainer'
  650 + .format(end_pos, curr_pos))
  651 +
  652 + def check_validity(self):
  653 + errs = self.check_rec_head()
  654 + for obj in self.rg_child_rec:
  655 + errs.extend(obj.check_validity())
  656 + return errs
  657 +
582 658
583 class DocumentContainer(PptType): 659 class DocumentContainer(PptType):
584 """ a DocumentContainer record 660 """ a DocumentContainer record
@@ -618,26 +694,32 @@ class DocumentContainer(PptType): @@ -618,26 +694,32 @@ class DocumentContainer(PptType):
618 694
619 this container contains lots of data we are not interested in. 695 this container contains lots of data we are not interested in.
620 """ 696 """
  697 +
  698 + log.debug('Parsing DocumentContainer from stream')
621 obj = clz() 699 obj = clz()
622 700
623 # parse record header 701 # parse record header
624 obj.read_rec_head(stream) 702 obj.read_rec_head(stream)
  703 + log.info('validity: {} errs'.format(len(obj.check_rec_head())))
625 704
626 # documentAtom (48 bytes): A DocumentAtom record (section 2.4.2) that 705 # documentAtom (48 bytes): A DocumentAtom record (section 2.4.2) that
627 # specifies size information for presentation slides and notes slides. 706 # specifies size information for presentation slides and notes slides.
628 obj.document_atom = DummyType('DocumentAtom', 0x03E9, rec_ver=0x1, 707 obj.document_atom = DummyType('DocumentAtom', 0x03E9, rec_ver=0x1,
629 rec_len=0x28).extract_from(stream) 708 rec_len=0x28).extract_from(stream)
  709 + log.info('validity: {} errs'.format(len(obj.document_atom.check_validity())))
630 710
631 # exObjList (variable): An optional ExObjListContainer record (section 711 # exObjList (variable): An optional ExObjListContainer record (section
632 # 2.10.1) that specifies the list of external objects in the document. 712 # 2.10.1) that specifies the list of external objects in the document.
633 obj.ex_obj_list = DummyType('ExObjListContainer', 0x0409, rec_ver=0xF)\ 713 obj.ex_obj_list = DummyType('ExObjListContainer', 0x0409, rec_ver=0xF)\
634 .extract_from(stream) 714 .extract_from(stream)
  715 + log.info('validity: {} errs'.format(len(obj.ex_obj_list.check_validity())))
635 716
636 # documentTextInfo (variable): A DocumentTextInfoContainer record 717 # documentTextInfo (variable): A DocumentTextInfoContainer record
637 # (section 2.9.1) that specifies the default text styles for the 718 # (section 2.9.1) that specifies the default text styles for the
638 # document. 719 # document.
639 obj.document_text_info = DummyType('DocumentTextInfoContainer', 0x03F2, 720 obj.document_text_info = DummyType('DocumentTextInfoContainer', 0x03F2,
640 rec_ver=0xF).extract_from(stream) 721 rec_ver=0xF).extract_from(stream)
  722 + log.info('validity: {} errs'.format(len(obj.document_text_info.check_validity())))
641 723
642 # soundCollection (variable): An optional SoundCollectionContainer 724 # soundCollection (variable): An optional SoundCollectionContainer
643 # record (section 2.4.16.1) that specifies the list of sounds in the 725 # record (section 2.4.16.1) that specifies the list of sounds in the
@@ -645,17 +727,20 @@ class DocumentContainer(PptType): @@ -645,17 +727,20 @@ class DocumentContainer(PptType):
645 obj.sound_collection = DummyType('SoundCollectionContainer', 0x07E4, 727 obj.sound_collection = DummyType('SoundCollectionContainer', 0x07E4,
646 rec_ver=0xF, rec_instance=0x005)\ 728 rec_ver=0xF, rec_instance=0x005)\
647 .extract_from(stream) 729 .extract_from(stream)
  730 + log.info('validity: {} errs'.format(len(obj.sound_collection.check_validity())))
648 731
649 # drawingGroup (variable): A DrawingGroupContainer record (section 732 # drawingGroup (variable): A DrawingGroupContainer record (section
650 # 2.4.3) that specifies drawing information for the document. 733 # 2.4.3) that specifies drawing information for the document.
651 obj.drawing_group = DummyType('DrawingGroupContainer', 0x040B, 734 obj.drawing_group = DummyType('DrawingGroupContainer', 0x040B,
652 rec_ver=0xF).extract_from(stream) 735 rec_ver=0xF).extract_from(stream)
  736 + log.info('validity: {} errs'.format(len(obj.drawing_group.check_validity())))
653 737
654 # masterList (variable): A MasterListWithTextContainer record (section 738 # masterList (variable): A MasterListWithTextContainer record (section
655 # 2.4.14.1) that specifies the list of main master slides and title 739 # 2.4.14.1) that specifies the list of main master slides and title
656 # master slides. 740 # master slides.
657 obj.master_list = DummyType('MasterListWithContainer', 0x0FF0, 741 obj.master_list = DummyType('MasterListWithContainer', 0x0FF0,
658 rec_ver=0xF).extract_from(stream) 742 rec_ver=0xF).extract_from(stream)
  743 + log.info('validity: {} errs'.format(len(obj.master_list.check_validity())))
659 744
660 # docInfoList (variable): An optional DocInfoListContainer record 745 # docInfoList (variable): An optional DocInfoListContainer record
661 # (section 2.4.4) that specifies additional document information. 746 # (section 2.4.4) that specifies additional document information.
@@ -747,15 +832,16 @@ class VBAInfoContainer(PptType): @@ -747,15 +832,16 @@ class VBAInfoContainer(PptType):
747 self.vba_info_atom = None 832 self.vba_info_atom = None
748 833
749 @classmethod 834 @classmethod
750 - def extract_from(clz, stream): 835 + def extract_from(clz, stream, rec_head):
  836 + """ since can determine this type only after reading header, it is arg
  837 + """
751 log.debug('parsing VBAInfoContainer') 838 log.debug('parsing VBAInfoContainer')
752 obj = clz() 839 obj = clz()
753 - obj.read_rec_head() 840 + obj.rec_head = rec_head
754 obj.vba_info_atom = VBAInfoAtom.extract_from(stream) 841 obj.vba_info_atom = VBAInfoAtom.extract_from(stream)
755 return obj 842 return obj
756 843
757 - def check_validty(self):  
758 - 844 + def check_validity(self):
759 errs = self.check_rec_head(length=0x14) 845 errs = self.check_rec_head(length=0x14)
760 errs.extend(self.vba_info_atom.check_validity()) 846 errs.extend(self.vba_info_atom.check_validity())
761 return errs 847 return errs
@@ -768,6 +854,7 @@ class VBAInfoAtom(PptType): @@ -768,6 +854,7 @@ class VBAInfoAtom(PptType):
768 """ 854 """
769 855
770 RECORD_TYPE = 0x0400 856 RECORD_TYPE = 0x0400
  857 + RECORD_VERSION = 0x2
771 858
772 def __init__(self): 859 def __init__(self):
773 super(VBAInfoAtom, self).__init__() 860 super(VBAInfoAtom, self).__init__()
@@ -803,9 +890,9 @@ class VBAInfoAtom(PptType): @@ -803,9 +890,9 @@ class VBAInfoAtom(PptType):
803 errs = self.check_rec_head(length=0x14) 890 errs = self.check_rec_head(length=0x14)
804 891
805 # must be 0 or 1: 892 # must be 0 or 1:
806 - errs.extend(self.check_range('fHasMacros', self.f_has_macros, None, 2)  
807 - errs.extend(self.check_value('version', self.version, 2)  
808 - return errs 893 + errs.extend(self.check_range('fHasMacros', self.f_has_macros, None, 2))
  894 + errs.extend(self.check_value('version', self.version, 2))
  895 + return errs
809 896
810 # === PptParser =============================================================== 897 # === PptParser ===============================================================
811 898
@@ -919,6 +1006,9 @@ class PptParser(object): @@ -919,6 +1006,9 @@ class PptParser(object):
919 log.warning('re-reading and overwriting ' 1006 log.warning('re-reading and overwriting '
920 'previously read persist_object_directory') 1007 'previously read persist_object_directory')
921 1008
  1009 + # Step 1: Read the CurrentUserAtom record (section 2.3.2) from the
  1010 + # Current User Stream (section 2.1.1). All seek operations in the steps
  1011 + # that follow this step are in the PowerPoint Document Stream.
922 if self.current_user_atom is None: 1012 if self.current_user_atom is None:
923 self.parse_current_user() 1013 self.parse_current_user()
924 1014
@@ -931,9 +1021,17 @@ class PptParser(object): @@ -931,9 +1021,17 @@ class PptParser(object):
931 try: 1021 try:
932 log.debug('opening stream') 1022 log.debug('opening stream')
933 stream = self.ole.openstream(MAIN_STREAM_NAME) 1023 stream = self.ole.openstream(MAIN_STREAM_NAME)
  1024 +
  1025 + # Repeat steps 3 through 6 until offsetLastEdit is 0x00000000.
934 while offset != 0: 1026 while offset != 0:
935 1027
  1028 + # Step 2: Seek, in the PowerPoint Document Stream, to the
  1029 + # offset specified by the offsetToCurrentEdit field of the
  1030 + # CurrentUserAtom record identified in step 1.
936 stream.seek(offset, os.SEEK_SET) 1031 stream.seek(offset, os.SEEK_SET)
  1032 +
  1033 + # Step 3: Read the UserEditAtom record at the current offset.
  1034 + # Let this record be a live record.
937 user_edit = UserEditAtom.extract_from(stream, is_encrypted) 1035 user_edit = UserEditAtom.extract_from(stream, is_encrypted)
938 if self.newest_user_edit is None: 1036 if self.newest_user_edit is None:
939 self.newest_user_edit = user_edit 1037 self.newest_user_edit = user_edit
@@ -948,10 +1046,15 @@ class PptParser(object): @@ -948,10 +1046,15 @@ class PptParser(object):
948 if errs and self.fast_fail: 1046 if errs and self.fast_fail:
949 raise errs[0] 1047 raise errs[0]
950 1048
  1049 + # Step 4: Seek to the offset specified by the
  1050 + # offsetPersistDirectory field of the UserEditAtom record
  1051 + # identified in step 3.
951 log.debug('seeking to pos {}' 1052 log.debug('seeking to pos {}'
952 .format(user_edit.offset_persist_directory)) 1053 .format(user_edit.offset_persist_directory))
953 stream.seek(user_edit.offset_persist_directory, os.SEEK_SET) 1054 stream.seek(user_edit.offset_persist_directory, os.SEEK_SET)
954 1055
  1056 + # Step 5: Read the PersistDirectoryAtom record at the current
  1057 + # offset. Let this record be a live record.
955 persist_dir_atom = PersistDirectoryAtom.extract_from(stream) 1058 persist_dir_atom = PersistDirectoryAtom.extract_from(stream)
956 1059
957 log.debug('checking validity') 1060 log.debug('checking validity')
@@ -965,14 +1068,37 @@ class PptParser(object): @@ -965,14 +1068,37 @@ class PptParser(object):
965 if errs and self.fast_fail: 1068 if errs and self.fast_fail:
966 raise errs[0] 1069 raise errs[0]
967 1070
  1071 +
  1072 + # Construct the complete persist object directory for this file
  1073 + # as follows:
  1074 + # - For each PersistDirectoryAtom record previously identified
  1075 + # in step 5, add the persist object identifier and persist
  1076 + # object stream offset pairs to the persist object directory
  1077 + # starting with the PersistDirectoryAtom record last
  1078 + # identified, that is, the one closest to the beginning of the
  1079 + # stream.
  1080 + # - Continue adding these pairs to the persist object directory
  1081 + # for each PersistDirectoryAtom record in the reverse order
  1082 + # that they were identified in step 5; that is, the pairs from
  1083 + # the PersistDirectoryAtom record closest to the end of the
  1084 + # stream are added last.
  1085 + # - When adding a new pair to the persist object directory, if
  1086 + # the persist object identifier already exists in the persist
  1087 + # object directory, the persist object stream offset from the
  1088 + # new pair replaces the existing persist object stream offset
  1089 + # for that persist object identifier.
968 for entry in persist_dir_atom.rg_persist_dir_entry: 1090 for entry in persist_dir_atom.rg_persist_dir_entry:
969 - log.debug('saving {} offsets for persist_id {}'  
970 - .format(len(entry.rg_persist_offset),  
971 - entry.persist_id))  
972 - self.persist_object_directory[entry.persist_id] = \  
973 - entry.rg_persist_offset 1091 + last_id = entry.persist_id+len(entry.rg_persist_offset)-1
  1092 + log.debug('for persist IDs {}-{}, save offsets {}'
  1093 + .format(entry.persist_id, last_id,
  1094 + entry.rg_persist_offset))
  1095 + for count, offset in enumerate(entry.rg_persist_offset):
  1096 + self.persist_object_directory[entry.persist_id+count] \
  1097 + = offset
974 1098
975 # check for more 1099 # check for more
  1100 + # Step 6: Seek to the offset specified by the offsetLastEdit
  1101 + # field in the UserEditAtom record identified in step 3.
976 offset = user_edit.offset_last_edit 1102 offset = user_edit.offset_last_edit
977 except Exception: 1103 except Exception:
978 if self.fast_fail: 1104 if self.fast_fail:
@@ -985,27 +1111,36 @@ class PptParser(object): @@ -985,27 +1111,36 @@ class PptParser(object):
985 stream.close() 1111 stream.close()
986 1112
987 def parse_document_persist_object(self): 1113 def parse_document_persist_object(self):
988 - """ """ 1114 + """ Part 2: Identify the document persist object """
989 if self.document_persist_obj is not None: 1115 if self.document_persist_obj is not None:
990 log.warning('re-reading and overwriting ' 1116 log.warning('re-reading and overwriting '
991 'previously read document_persist_object') 1117 'previously read document_persist_object')
992 1118
  1119 + # Step 1: Read the docPersistIdRef field of the UserEditAtom record
  1120 + # first identified in step 3 of Part 1, that is, the UserEditAtom
  1121 + # record closest to the end of the stream.
993 if self.persist_object_directory is None: 1122 if self.persist_object_directory is None:
994 self.parse_persist_object_directory() 1123 self.parse_persist_object_directory()
995 1124
996 - # find the offset of the document container 1125 + # Step 2: Lookup the value of the docPersistIdRef field in the persist
  1126 + # object directory constructed in step 8 of Part 1 to find the stream
  1127 + # offset of a persist object.
997 newest_ref = self.newest_user_edit.doc_persist_id_ref 1128 newest_ref = self.newest_user_edit.doc_persist_id_ref
998 offset = self.persist_object_directory[newest_ref] 1129 offset = self.persist_object_directory[newest_ref]
999 - raise NotImplementedError('should have 1 offset here!') 1130 + log.debug('newest user edit ID is {}, offset is {}'
  1131 + .format(newest_ref, offset))
1000 1132
1001 stream = None 1133 stream = None
1002 1134
1003 try: 1135 try:
  1136 + # Step 3: Seek to the stream offset specified in step 2.
1004 log.debug('opening stream') 1137 log.debug('opening stream')
1005 stream = self.ole.openstream(MAIN_STREAM_NAME) 1138 stream = self.ole.openstream(MAIN_STREAM_NAME)
1006 - log.debug('stream pos: {}'.format(stream.tell()))  
1007 - stream.seek(offset)  
1008 - log.debug('seek by {} to {}'.format(offset, stream.tell())) 1139 + log.debug('seek to {}'.format(offset))
  1140 + stream.seek(offset, os.SEEK_SET)
  1141 +
  1142 + # Step 4: Read the DocumentContainer record at the current offset.
  1143 + # Let this record be a live record.
1009 self.document_persist_obj = DocumentContainer.extract_from(stream) 1144 self.document_persist_obj = DocumentContainer.extract_from(stream)
1010 except Exception: 1145 except Exception:
1011 if self.fast_fail: 1146 if self.fast_fail:
@@ -1032,17 +1167,20 @@ class PptParser(object): @@ -1032,17 +1167,20 @@ class PptParser(object):
1032 def test(): 1167 def test():
1033 """ for testing and debugging """ 1168 """ for testing and debugging """
1034 1169
  1170 + from glob import glob
  1171 +
1035 # setup logging 1172 # setup logging
1036 logging.basicConfig(level=logging.DEBUG, 1173 logging.basicConfig(level=logging.DEBUG,
1037 format='%(levelname)-8s %(message)s') 1174 format='%(levelname)-8s %(message)s')
1038 log.setLevel(logging.NOTSET) 1175 log.setLevel(logging.NOTSET)
1039 1176
1040 - # test file with some autostart macros  
1041 - test_file = 'gelaber_autostart.ppt'  
1042 -  
1043 - # parse  
1044 - ppt = PptParser(test_file, fast_fail=False)  
1045 - ppt.parse_document_persist_object() 1177 + #test_file = 'gelaber_autostart.ppt'
  1178 + for file_name in glob('*.ppt'):
  1179 + # parse
  1180 + log.info('-' * 72)
  1181 + log.info('test file: {}'.format(file_name))
  1182 + ppt = PptParser(file_name, fast_fail=False)
  1183 + ppt.parse_document_persist_object()
1046 1184
1047 1185
1048 if __name__ == '__main__': 1186 if __name__ == '__main__':