Commit 62c927a86f77d0f34cd5287c33f2bf5fca82591f

Authored by Christian Herdtweck
1 parent 63dafd09

started vba types

Showing 1 changed file with 103 additions and 23 deletions
oletools/ppt_parser.py
@@ -18,6 +18,7 @@ References: @@ -18,6 +18,7 @@ References:
18 # TODO: 18 # TODO:
19 # - make CurrentUserAtom and UserEditAtom PptTypes; adjust parse 19 # - make CurrentUserAtom and UserEditAtom PptTypes; adjust parse
20 # - make stream optional in PptUnexpectedData 20 # - make stream optional in PptUnexpectedData
  21 +# - can speed-up by using less bigger struct.parse calls?
21 # - license 22 # - license
22 # - create a AtomBase class that defines check_value and parses RecordHead? 23 # - create a AtomBase class that defines check_value and parses RecordHead?
23 # 24 #
@@ -64,7 +65,21 @@ class PptUnexpectedData(Exception): @@ -64,7 +65,21 @@ class PptUnexpectedData(Exception):
64 super(PptUnexpectedData, self).__init__(self.msg) 65 super(PptUnexpectedData, self).__init__(self.msg)
65 66
66 67
67 -# === STRUCTS ================================================================= 68 +# === HELPERS =================================================================
  69 +
  70 +def read_1(stream):
  71 + """ read 1 byte from stream """
  72 + return struct.unpack('<B', stream.read(1))[0]
  73 +
  74 +
  75 +def read_2(stream):
  76 + """ read 2 byte (short) from stream """
  77 + return struct.unpack('<H', stream.read(2))[0]
  78 +
  79 +
  80 +def read_4(stream):
  81 + """ read 4 byte (long) from stream """
  82 + return struct.unpack('<L', stream.read(4))[0]
68 83
69 84
70 def check_value(name, value, expected): 85 def check_value(name, value, expected):
@@ -82,6 +97,8 @@ def check_value(name, value, expected): @@ -82,6 +97,8 @@ def check_value(name, value, expected):
82 '{0:04X}'.format(value), '{0:04X}'.format(expected)) 97 '{0:04X}'.format(value), '{0:04X}'.format(expected))
83 98
84 99
  100 +# === STRUCTS =================================================================
  101 +
85 class RecordHeader(object): 102 class RecordHeader(object):
86 """ a record header, at start of many types found in ppt files 103 """ a record header, at start of many types found in ppt files
87 104
@@ -214,33 +231,12 @@ class PptType(object): @@ -214,33 +231,12 @@ class PptType(object):
214 raise NotImplementedError('abstract base function!') 231 raise NotImplementedError('abstract base function!')
215 232
216 def __init__(self, stream_name=MAIN_STREAM_NAME): 233 def __init__(self, stream_name=MAIN_STREAM_NAME):
217 - self.stream = None  
218 self.stream_name = stream_name 234 self.stream_name = stream_name
219 self.rec_head = None 235 self.rec_head = None
220 236
221 def read_rec_head(self, stream): 237 def read_rec_head(self, stream):
222 self.rec_head = RecordHeader.extract_from(stream) 238 self.rec_head = RecordHeader.extract_from(stream)
223 239
224 - def set_stream(self, stream):  
225 - """ need to call before any read_... method """  
226 - self.stream = stream  
227 -  
228 - def unset_stream(self):  
229 - """ should call after any read_... method """  
230 - self.stream = None  
231 -  
232 - def read_1(self):  
233 - """ read 1 byte from stream """  
234 - return struct.unpack('<B', self.stream.read(1))[0]  
235 -  
236 - def read_2(self):  
237 - """ read 2 byte (short) from stream """  
238 - return struct.unpack('<H', self.stream.read(2))[0]  
239 -  
240 - def read_4(self):  
241 - """ read 4 byte (long) from stream """  
242 - return struct.unpack('<L', self.stream.read(4))[0]  
243 -  
244 def check_validity(self): 240 def check_validity(self):
245 """ to be overwritten in subclasses 241 """ to be overwritten in subclasses
246 242
@@ -737,6 +733,82 @@ class DocumentContainer(PptType): @@ -737,6 +733,82 @@ class DocumentContainer(PptType):
737 errs.extend(self.doc_info_list.check_validity()) 733 errs.extend(self.doc_info_list.check_validity())
738 return errs 734 return errs
739 735
  736 +
  737 +class VBAInfoContainer(PptType):
  738 + """ A container record that specifies VBA information for the document.
  739 +
  740 + https://msdn.microsoft.com/en-us/library/dd952168%28v=office.12%29.aspx
  741 + """
  742 +
  743 + RECORD_TYPE = 0x03FF
  744 + RECORD_VERSION = 0xF
  745 + RECORD_INSTANCE = 0x001
  746 +
  747 + def __init__(self):
  748 + super(VBAInfoContainer, self).__init__()
  749 + self.vba_info_atom = None
  750 +
  751 + @classmethod
  752 + def extract_from(clz, stream):
  753 + log.debug('parsing VBAInfoContainer')
  754 + obj = clz()
  755 + obj.read_rec_head()
  756 + obj.vba_info_atom = VBAInfoAtom.extract_from(stream)
  757 + return obj
  758 +
  759 + def check_validty(self):
  760 +
  761 + errs = self.check_rec_head(length=0x14)
  762 + errs.extend(self.vba_info_atom.check_validity())
  763 + return errs
  764 +
  765 +
  766 +class VBAInfoAtom(PptType):
  767 + """ An atom record that specifies a reference to the VBA project storage.
  768 +
  769 + https://msdn.microsoft.com/en-us/library/dd948874%28v=office.12%29.aspx
  770 + """
  771 +
  772 + RECORD_TYPE = 0x0400
  773 +
  774 + def __init__(self):
  775 + super(VBAInfoAtom, self).__init__()
  776 + self.persist_id_ref = None
  777 + self.f_has_macros = None
  778 + self.version = None
  779 +
  780 + @classmethod
  781 + def extract_from(clz, stream):
  782 + log.debug('parsing VBAInfoAtom')
  783 + obj = clz()
  784 + obj.read_rec_head()
  785 +
  786 + # persistIdRef (4 bytes): A PersistIdRef (section 2.2.21) that
  787 + # specifies the value to look up in the persist object directory to
  788 + # find the offset of a VbaProjectStg record (section 2.10.40).
  789 + obj.persist_id_ref = read_4(stream)
  790 +
  791 + # fHasMacros (4 bytes): An unsigned integer that specifies whether the
  792 + # VBA project storage contains data. It MUST be 0 (empty vba storage)
  793 + # or 1 (vba storage contains data)
  794 + obj.f_has_macros = read_4(stream)
  795 +
  796 + # version (4 bytes): An unsigned integer that specifies the VBA runtime
  797 + # version that generated the VBA project storage. It MUST be
  798 + # 0x00000002.
  799 + obj.version = read_4(stream)
  800 +
  801 + return obj
  802 +
  803 + def check_validty(self):
  804 +
  805 + errs = self.check_rec_head(length=0x14)
  806 +
  807 + # must be 0 or 1:
  808 + errs.extend(self.check_range('fHasMacros', self.f_has_macros, None, 2)
  809 + errs.extend(self.check_value('version', self.version, 2)
  810 + return errs
  811 +
740 # === PptParser =============================================================== 812 # === PptParser ===============================================================
741 813
742 814
@@ -762,6 +834,7 @@ class PptParser(object): @@ -762,6 +834,7 @@ class PptParser(object):
762 self.fast_fail = fast_fail 834 self.fast_fail = fast_fail
763 835
764 self.current_user_atom = None 836 self.current_user_atom = None
  837 + self.newest_user_edit = None
765 self.document_persist_obj = None 838 self.document_persist_obj = None
766 self.persist_object_directory = None 839 self.persist_object_directory = None
767 840
@@ -845,6 +918,7 @@ class PptParser(object): @@ -845,6 +918,7 @@ class PptParser(object):
845 offset = self.current_user_atom.offset_to_current_edit 918 offset = self.current_user_atom.offset_to_current_edit
846 is_encrypted = self.current_user_atom.is_encrypted() 919 is_encrypted = self.current_user_atom.is_encrypted()
847 self.persist_object_directory = {} 920 self.persist_object_directory = {}
  921 + self.newest_user_edit = None
848 922
849 stream = None 923 stream = None
850 try: 924 try:
@@ -854,6 +928,8 @@ class PptParser(object): @@ -854,6 +928,8 @@ class PptParser(object):
854 928
855 stream.seek(offset, os.SEEK_SET) 929 stream.seek(offset, os.SEEK_SET)
856 user_edit = UserEditAtom.extract_from(stream, is_encrypted) 930 user_edit = UserEditAtom.extract_from(stream, is_encrypted)
  931 + if self.newest_user_edit is None:
  932 + self.newest_user_edit = user_edit
857 933
858 log.debug('checking validity') 934 log.debug('checking validity')
859 errs = user_edit.check_validity() 935 errs = user_edit.check_validity()
@@ -910,7 +986,11 @@ class PptParser(object): @@ -910,7 +986,11 @@ class PptParser(object):
910 if self.persist_object_directory is None: 986 if self.persist_object_directory is None:
911 self.parse_persist_object_directory() 987 self.parse_persist_object_directory()
912 988
913 - offset = None # TODO: read from object directory 989 + # find the offset of the document container
  990 + newest_ref = self.newest_user_edit.doc_persist_id_ref
  991 + offset = self.persist_object_directory[newest_ref]
  992 + raise NotImplementedError('should have 1 offset here!')
  993 +
914 stream = None 994 stream = None
915 995
916 try: 996 try: