Commit 8ee2016145a6fba3626cf52fa7004651c39b0e13

Authored by Christian Herdtweck
1 parent 0a8eace5

successfully found and parsed VBAInfoContainer+Atom but still no VBA code...

Showing 1 changed file with 47 additions and 8 deletions
oletools/ppt_parser.py
@@ -838,23 +838,28 @@ class VBAInfoContainer(PptType): @@ -838,23 +838,28 @@ class VBAInfoContainer(PptType):
838 RECORD_TYPE = 0x03FF 838 RECORD_TYPE = 0x03FF
839 RECORD_VERSION = 0xF 839 RECORD_VERSION = 0xF
840 RECORD_INSTANCE = 0x001 840 RECORD_INSTANCE = 0x001
  841 + RECORD_LENGTH = 0x14
841 842
842 def __init__(self): 843 def __init__(self):
843 super(VBAInfoContainer, self).__init__() 844 super(VBAInfoContainer, self).__init__()
844 self.vba_info_atom = None 845 self.vba_info_atom = None
845 846
846 @classmethod 847 @classmethod
847 - def extract_from(clz, stream, rec_head): 848 + def extract_from(clz, stream, rec_head=None):
848 """ since can determine this type only after reading header, it is arg 849 """ since can determine this type only after reading header, it is arg
849 """ 850 """
850 log.debug('parsing VBAInfoContainer') 851 log.debug('parsing VBAInfoContainer')
851 obj = clz() 852 obj = clz()
852 - obj.rec_head = rec_head 853 + if rec_head is None:
  854 + obj.read_rec_head(stream)
  855 + else:
  856 + log.debug('skip parsing of RecordHead')
  857 + obj.rec_head = rec_head
853 obj.vba_info_atom = VBAInfoAtom.extract_from(stream) 858 obj.vba_info_atom = VBAInfoAtom.extract_from(stream)
854 return obj 859 return obj
855 860
856 def check_validity(self): 861 def check_validity(self):
857 - errs = self.check_rec_head(length=0x14) 862 + errs = self.check_rec_head(length=self.RECORD_LENGTH)
858 errs.extend(self.vba_info_atom.check_validity()) 863 errs.extend(self.vba_info_atom.check_validity())
859 return errs 864 return errs
860 865
@@ -867,6 +872,7 @@ class VBAInfoAtom(PptType): @@ -867,6 +872,7 @@ class VBAInfoAtom(PptType):
867 872
868 RECORD_TYPE = 0x0400 873 RECORD_TYPE = 0x0400
869 RECORD_VERSION = 0x2 874 RECORD_VERSION = 0x2
  875 + RECORD_LENGTH = 0x0C
870 876
871 def __init__(self): 877 def __init__(self):
872 super(VBAInfoAtom, self).__init__() 878 super(VBAInfoAtom, self).__init__()
@@ -878,7 +884,7 @@ class VBAInfoAtom(PptType): @@ -878,7 +884,7 @@ class VBAInfoAtom(PptType):
878 def extract_from(clz, stream): 884 def extract_from(clz, stream):
879 log.debug('parsing VBAInfoAtom') 885 log.debug('parsing VBAInfoAtom')
880 obj = clz() 886 obj = clz()
881 - obj.read_rec_head() 887 + obj.read_rec_head(stream)
882 888
883 # persistIdRef (4 bytes): A PersistIdRef (section 2.2.21) that 889 # persistIdRef (4 bytes): A PersistIdRef (section 2.2.21) that
884 # specifies the value to look up in the persist object directory to 890 # specifies the value to look up in the persist object directory to
@@ -897,9 +903,9 @@ class VBAInfoAtom(PptType): @@ -897,9 +903,9 @@ class VBAInfoAtom(PptType):
897 903
898 return obj 904 return obj
899 905
900 - def check_validty(self): 906 + def check_validity(self):
901 907
902 - errs = self.check_rec_head(length=0x14) 908 + errs = self.check_rec_head(length=self.RECORD_LENGTH)
903 909
904 # must be 0 or 1: 910 # must be 0 or 1:
905 errs.extend(self.check_range('fHasMacros', self.f_has_macros, None, 2)) 911 errs.extend(self.check_range('fHasMacros', self.f_has_macros, None, 2))
@@ -1184,11 +1190,13 @@ class PptParser(object): @@ -1184,11 +1190,13 @@ class PptParser(object):
1184 BUF_SIZE = 1024 1190 BUF_SIZE = 1024
1185 1191
1186 pattern = RecordHeader.generate( 1192 pattern = RecordHeader.generate(
1187 - VBAInfoContainer.RECORD_TYPE, rec_len=0x14, 1193 + VBAInfoContainer.RECORD_TYPE,
  1194 + rec_len=VBAInfoContainer.RECORD_LENGTH,
1188 rec_instance=VBAInfoContainer.RECORD_INSTANCE, 1195 rec_instance=VBAInfoContainer.RECORD_INSTANCE,
1189 rec_ver=VBAInfoContainer.RECORD_VERSION) \ 1196 rec_ver=VBAInfoContainer.RECORD_VERSION) \
1190 + RecordHeader.generate( 1197 + RecordHeader.generate(
1191 - VBAInfoAtom.RECORD_TYPE, rec_len=0xC, 1198 + VBAInfoAtom.RECORD_TYPE,
  1199 + rec_len=VBAInfoAtom.RECORD_LENGTH,
1192 rec_instance=VBAInfoAtom.RECORD_INSTANCE, 1200 rec_instance=VBAInfoAtom.RECORD_INSTANCE,
1193 rec_ver=VBAInfoAtom.RECORD_VERSION) 1201 rec_ver=VBAInfoAtom.RECORD_VERSION)
1194 pattern_len = len(pattern) 1202 pattern_len = len(pattern)
@@ -1200,7 +1208,10 @@ class PptParser(object): @@ -1200,7 +1208,10 @@ class PptParser(object):
1200 try: 1208 try:
1201 log.debug('opening stream') 1209 log.debug('opening stream')
1202 stream = self.ole.openstream(MAIN_STREAM_NAME) 1210 stream = self.ole.openstream(MAIN_STREAM_NAME)
  1211 +
  1212 + # look for candidate positions
1203 n_reads = 0 1213 n_reads = 0
  1214 + candidates = []
1204 while True: 1215 while True:
1205 start_pos = stream.tell() 1216 start_pos = stream.tell()
1206 n_reads += 1 1217 n_reads += 1
@@ -1210,15 +1221,43 @@ class PptParser(object): @@ -1210,15 +1221,43 @@ class PptParser(object):
1210 idx = buf.find(pattern) 1221 idx = buf.find(pattern)
1211 while idx != -1: 1222 while idx != -1:
1212 log.info('found pattern at index {}'.format(start_pos+idx)) 1223 log.info('found pattern at index {}'.format(start_pos+idx))
  1224 + candidates.append(start_pos+idx)
1213 idx = buf.find(pattern, idx+1) 1225 idx = buf.find(pattern, idx+1)
1214 1226
1215 if len(buf) == BUF_SIZE: 1227 if len(buf) == BUF_SIZE:
  1228 + # move back a bit to avoid splitting of pattern through buf
1216 stream.seek(-1 * pattern_len, os.SEEK_CUR) 1229 stream.seek(-1 * pattern_len, os.SEEK_CUR)
1217 else: 1230 else:
1218 log.debug('reached end of buf (read {}<{}) after {} reads' 1231 log.debug('reached end of buf (read {}<{}) after {} reads'
1219 .format(len(buf), BUF_SIZE, n_reads)) 1232 .format(len(buf), BUF_SIZE, n_reads))
1220 break 1233 break
1221 1234
  1235 + # try parse
  1236 + for idx in candidates:
  1237 + # assume that in stream at idx there is a VBAInfoContainer
  1238 + stream.seek(idx)
  1239 + log.info('extracting at idx {}'.format(idx))
  1240 + try:
  1241 + container = VBAInfoContainer.extract_from(stream)
  1242 + except Exception:
  1243 + self._log_exception()
  1244 + continue
  1245 +
  1246 + errs = container.check_validity()
  1247 + if errs:
  1248 + log.warning('check_validity found {} issues'.format(len(errs)))
  1249 + else:
  1250 + log.info('container is ok')
  1251 + atom = container.vba_info_atom
  1252 + log.info('persist id ref is {}, has_macros {}, version {}'
  1253 + .format(atom.persist_id_ref, atom.f_has_macros,
  1254 + atom.version))
  1255 + for err in errs:
  1256 + log.warning('check_validity(VBAInfoContainer): {}'
  1257 + .format(err))
  1258 + if errs and self.fast_fail:
  1259 + raise errs[0]
  1260 +
1222 finally: 1261 finally:
1223 if stream is not None: 1262 if stream is not None:
1224 log.debug('closing stream') 1263 log.debug('closing stream')