Commit 62c927a86f77d0f34cd5287c33f2bf5fca82591f
1 parent
63dafd09
started vba types
Showing
1 changed file
with
103 additions
and
23 deletions
oletools/ppt_parser.py
| ... | ... | @@ -18,6 +18,7 @@ References: |
| 18 | 18 | # TODO: |
| 19 | 19 | # - make CurrentUserAtom and UserEditAtom PptTypes; adjust parse |
| 20 | 20 | # - make stream optional in PptUnexpectedData |
| 21 | +# - can speed-up by using less bigger struct.parse calls? | |
| 21 | 22 | # - license |
| 22 | 23 | # - create a AtomBase class that defines check_value and parses RecordHead? |
| 23 | 24 | # |
| ... | ... | @@ -64,7 +65,21 @@ class PptUnexpectedData(Exception): |
| 64 | 65 | super(PptUnexpectedData, self).__init__(self.msg) |
| 65 | 66 | |
| 66 | 67 | |
| 67 | -# === STRUCTS ================================================================= | |
| 68 | +# === HELPERS ================================================================= | |
| 69 | + | |
| 70 | +def read_1(stream): | |
| 71 | + """ read 1 byte from stream """ | |
| 72 | + return struct.unpack('<B', stream.read(1))[0] | |
| 73 | + | |
| 74 | + | |
| 75 | +def read_2(stream): | |
| 76 | + """ read 2 byte (short) from stream """ | |
| 77 | + return struct.unpack('<H', stream.read(2))[0] | |
| 78 | + | |
| 79 | + | |
| 80 | +def read_4(stream): | |
| 81 | + """ read 4 byte (long) from stream """ | |
| 82 | + return struct.unpack('<L', stream.read(4))[0] | |
| 68 | 83 | |
| 69 | 84 | |
| 70 | 85 | def check_value(name, value, expected): |
| ... | ... | @@ -82,6 +97,8 @@ def check_value(name, value, expected): |
| 82 | 97 | '{0:04X}'.format(value), '{0:04X}'.format(expected)) |
| 83 | 98 | |
| 84 | 99 | |
| 100 | +# === STRUCTS ================================================================= | |
| 101 | + | |
| 85 | 102 | class RecordHeader(object): |
| 86 | 103 | """ a record header, at start of many types found in ppt files |
| 87 | 104 | |
| ... | ... | @@ -214,33 +231,12 @@ class PptType(object): |
| 214 | 231 | raise NotImplementedError('abstract base function!') |
| 215 | 232 | |
| 216 | 233 | def __init__(self, stream_name=MAIN_STREAM_NAME): |
| 217 | - self.stream = None | |
| 218 | 234 | self.stream_name = stream_name |
| 219 | 235 | self.rec_head = None |
| 220 | 236 | |
| 221 | 237 | def read_rec_head(self, stream): |
| 222 | 238 | self.rec_head = RecordHeader.extract_from(stream) |
| 223 | 239 | |
| 224 | - def set_stream(self, stream): | |
| 225 | - """ need to call before any read_... method """ | |
| 226 | - self.stream = stream | |
| 227 | - | |
| 228 | - def unset_stream(self): | |
| 229 | - """ should call after any read_... method """ | |
| 230 | - self.stream = None | |
| 231 | - | |
| 232 | - def read_1(self): | |
| 233 | - """ read 1 byte from stream """ | |
| 234 | - return struct.unpack('<B', self.stream.read(1))[0] | |
| 235 | - | |
| 236 | - def read_2(self): | |
| 237 | - """ read 2 byte (short) from stream """ | |
| 238 | - return struct.unpack('<H', self.stream.read(2))[0] | |
| 239 | - | |
| 240 | - def read_4(self): | |
| 241 | - """ read 4 byte (long) from stream """ | |
| 242 | - return struct.unpack('<L', self.stream.read(4))[0] | |
| 243 | - | |
| 244 | 240 | def check_validity(self): |
| 245 | 241 | """ to be overwritten in subclasses |
| 246 | 242 | |
| ... | ... | @@ -737,6 +733,82 @@ class DocumentContainer(PptType): |
| 737 | 733 | errs.extend(self.doc_info_list.check_validity()) |
| 738 | 734 | return errs |
| 739 | 735 | |
| 736 | + | |
| 737 | +class VBAInfoContainer(PptType): | |
| 738 | + """ A container record that specifies VBA information for the document. | |
| 739 | + | |
| 740 | + https://msdn.microsoft.com/en-us/library/dd952168%28v=office.12%29.aspx | |
| 741 | + """ | |
| 742 | + | |
| 743 | + RECORD_TYPE = 0x03FF | |
| 744 | + RECORD_VERSION = 0xF | |
| 745 | + RECORD_INSTANCE = 0x001 | |
| 746 | + | |
| 747 | + def __init__(self): | |
| 748 | + super(VBAInfoContainer, self).__init__() | |
| 749 | + self.vba_info_atom = None | |
| 750 | + | |
| 751 | + @classmethod | |
| 752 | + def extract_from(clz, stream): | |
| 753 | + log.debug('parsing VBAInfoContainer') | |
| 754 | + obj = clz() | |
| 755 | + obj.read_rec_head() | |
| 756 | + obj.vba_info_atom = VBAInfoAtom.extract_from(stream) | |
| 757 | + return obj | |
| 758 | + | |
| 759 | + def check_validty(self): | |
| 760 | + | |
| 761 | + errs = self.check_rec_head(length=0x14) | |
| 762 | + errs.extend(self.vba_info_atom.check_validity()) | |
| 763 | + return errs | |
| 764 | + | |
| 765 | + | |
| 766 | +class VBAInfoAtom(PptType): | |
| 767 | + """ An atom record that specifies a reference to the VBA project storage. | |
| 768 | + | |
| 769 | + https://msdn.microsoft.com/en-us/library/dd948874%28v=office.12%29.aspx | |
| 770 | + """ | |
| 771 | + | |
| 772 | + RECORD_TYPE = 0x0400 | |
| 773 | + | |
| 774 | + def __init__(self): | |
| 775 | + super(VBAInfoAtom, self).__init__() | |
| 776 | + self.persist_id_ref = None | |
| 777 | + self.f_has_macros = None | |
| 778 | + self.version = None | |
| 779 | + | |
| 780 | + @classmethod | |
| 781 | + def extract_from(clz, stream): | |
| 782 | + log.debug('parsing VBAInfoAtom') | |
| 783 | + obj = clz() | |
| 784 | + obj.read_rec_head() | |
| 785 | + | |
| 786 | + # persistIdRef (4 bytes): A PersistIdRef (section 2.2.21) that | |
| 787 | + # specifies the value to look up in the persist object directory to | |
| 788 | + # find the offset of a VbaProjectStg record (section 2.10.40). | |
| 789 | + obj.persist_id_ref = read_4(stream) | |
| 790 | + | |
| 791 | + # fHasMacros (4 bytes): An unsigned integer that specifies whether the | |
| 792 | + # VBA project storage contains data. It MUST be 0 (empty vba storage) | |
| 793 | + # or 1 (vba storage contains data) | |
| 794 | + obj.f_has_macros = read_4(stream) | |
| 795 | + | |
| 796 | + # version (4 bytes): An unsigned integer that specifies the VBA runtime | |
| 797 | + # version that generated the VBA project storage. It MUST be | |
| 798 | + # 0x00000002. | |
| 799 | + obj.version = read_4(stream) | |
| 800 | + | |
| 801 | + return obj | |
| 802 | + | |
| 803 | + def check_validty(self): | |
| 804 | + | |
| 805 | + errs = self.check_rec_head(length=0x14) | |
| 806 | + | |
| 807 | + # must be 0 or 1: | |
| 808 | + errs.extend(self.check_range('fHasMacros', self.f_has_macros, None, 2) | |
| 809 | + errs.extend(self.check_value('version', self.version, 2) | |
| 810 | + return errs | |
| 811 | + | |
| 740 | 812 | # === PptParser =============================================================== |
| 741 | 813 | |
| 742 | 814 | |
| ... | ... | @@ -762,6 +834,7 @@ class PptParser(object): |
| 762 | 834 | self.fast_fail = fast_fail |
| 763 | 835 | |
| 764 | 836 | self.current_user_atom = None |
| 837 | + self.newest_user_edit = None | |
| 765 | 838 | self.document_persist_obj = None |
| 766 | 839 | self.persist_object_directory = None |
| 767 | 840 | |
| ... | ... | @@ -845,6 +918,7 @@ class PptParser(object): |
| 845 | 918 | offset = self.current_user_atom.offset_to_current_edit |
| 846 | 919 | is_encrypted = self.current_user_atom.is_encrypted() |
| 847 | 920 | self.persist_object_directory = {} |
| 921 | + self.newest_user_edit = None | |
| 848 | 922 | |
| 849 | 923 | stream = None |
| 850 | 924 | try: |
| ... | ... | @@ -854,6 +928,8 @@ class PptParser(object): |
| 854 | 928 | |
| 855 | 929 | stream.seek(offset, os.SEEK_SET) |
| 856 | 930 | user_edit = UserEditAtom.extract_from(stream, is_encrypted) |
| 931 | + if self.newest_user_edit is None: | |
| 932 | + self.newest_user_edit = user_edit | |
| 857 | 933 | |
| 858 | 934 | log.debug('checking validity') |
| 859 | 935 | errs = user_edit.check_validity() |
| ... | ... | @@ -910,7 +986,11 @@ class PptParser(object): |
| 910 | 986 | if self.persist_object_directory is None: |
| 911 | 987 | self.parse_persist_object_directory() |
| 912 | 988 | |
| 913 | - offset = None # TODO: read from object directory | |
| 989 | + # find the offset of the document container | |
| 990 | + newest_ref = self.newest_user_edit.doc_persist_id_ref | |
| 991 | + offset = self.persist_object_directory[newest_ref] | |
| 992 | + raise NotImplementedError('should have 1 offset here!') | |
| 993 | + | |
| 914 | 994 | stream = None |
| 915 | 995 | |
| 916 | 996 | try: | ... | ... |