Commit 8ee2016145a6fba3626cf52fa7004651c39b0e13

Authored by Christian Herdtweck
1 parent 0a8eace5

successfully found and parsed VBAInfoContainer+Atom but still no VBA code...

Showing 1 changed file with 47 additions and 8 deletions
oletools/ppt_parser.py
... ... @@ -838,23 +838,28 @@ class VBAInfoContainer(PptType):
838 838 RECORD_TYPE = 0x03FF
839 839 RECORD_VERSION = 0xF
840 840 RECORD_INSTANCE = 0x001
  841 + RECORD_LENGTH = 0x14
841 842  
842 843 def __init__(self):
843 844 super(VBAInfoContainer, self).__init__()
844 845 self.vba_info_atom = None
845 846  
846 847 @classmethod
847   - def extract_from(clz, stream, rec_head):
  848 + def extract_from(clz, stream, rec_head=None):
848 849 """ since can determine this type only after reading header, it is arg
849 850 """
850 851 log.debug('parsing VBAInfoContainer')
851 852 obj = clz()
852   - obj.rec_head = rec_head
  853 + if rec_head is None:
  854 + obj.read_rec_head(stream)
  855 + else:
  856 + log.debug('skip parsing of RecordHead')
  857 + obj.rec_head = rec_head
853 858 obj.vba_info_atom = VBAInfoAtom.extract_from(stream)
854 859 return obj
855 860  
856 861 def check_validity(self):
857   - errs = self.check_rec_head(length=0x14)
  862 + errs = self.check_rec_head(length=self.RECORD_LENGTH)
858 863 errs.extend(self.vba_info_atom.check_validity())
859 864 return errs
860 865  
... ... @@ -867,6 +872,7 @@ class VBAInfoAtom(PptType):
867 872  
868 873 RECORD_TYPE = 0x0400
869 874 RECORD_VERSION = 0x2
  875 + RECORD_LENGTH = 0x0C
870 876  
871 877 def __init__(self):
872 878 super(VBAInfoAtom, self).__init__()
... ... @@ -878,7 +884,7 @@ class VBAInfoAtom(PptType):
878 884 def extract_from(clz, stream):
879 885 log.debug('parsing VBAInfoAtom')
880 886 obj = clz()
881   - obj.read_rec_head()
  887 + obj.read_rec_head(stream)
882 888  
883 889 # persistIdRef (4 bytes): A PersistIdRef (section 2.2.21) that
884 890 # specifies the value to look up in the persist object directory to
... ... @@ -897,9 +903,9 @@ class VBAInfoAtom(PptType):
897 903  
898 904 return obj
899 905  
900   - def check_validty(self):
  906 + def check_validity(self):
901 907  
902   - errs = self.check_rec_head(length=0x14)
  908 + errs = self.check_rec_head(length=self.RECORD_LENGTH)
903 909  
904 910 # must be 0 or 1:
905 911 errs.extend(self.check_range('fHasMacros', self.f_has_macros, None, 2))
... ... @@ -1184,11 +1190,13 @@ class PptParser(object):
1184 1190 BUF_SIZE = 1024
1185 1191  
1186 1192 pattern = RecordHeader.generate(
1187   - VBAInfoContainer.RECORD_TYPE, rec_len=0x14,
  1193 + VBAInfoContainer.RECORD_TYPE,
  1194 + rec_len=VBAInfoContainer.RECORD_LENGTH,
1188 1195 rec_instance=VBAInfoContainer.RECORD_INSTANCE,
1189 1196 rec_ver=VBAInfoContainer.RECORD_VERSION) \
1190 1197 + RecordHeader.generate(
1191   - VBAInfoAtom.RECORD_TYPE, rec_len=0xC,
  1198 + VBAInfoAtom.RECORD_TYPE,
  1199 + rec_len=VBAInfoAtom.RECORD_LENGTH,
1192 1200 rec_instance=VBAInfoAtom.RECORD_INSTANCE,
1193 1201 rec_ver=VBAInfoAtom.RECORD_VERSION)
1194 1202 pattern_len = len(pattern)
... ... @@ -1200,7 +1208,10 @@ class PptParser(object):
1200 1208 try:
1201 1209 log.debug('opening stream')
1202 1210 stream = self.ole.openstream(MAIN_STREAM_NAME)
  1211 +
  1212 + # look for candidate positions
1203 1213 n_reads = 0
  1214 + candidates = []
1204 1215 while True:
1205 1216 start_pos = stream.tell()
1206 1217 n_reads += 1
... ... @@ -1210,15 +1221,43 @@ class PptParser(object):
1210 1221 idx = buf.find(pattern)
1211 1222 while idx != -1:
1212 1223 log.info('found pattern at index {}'.format(start_pos+idx))
  1224 + candidates.append(start_pos+idx)
1213 1225 idx = buf.find(pattern, idx+1)
1214 1226  
1215 1227 if len(buf) == BUF_SIZE:
  1228 + # move back a bit to avoid splitting of pattern through buf
1216 1229 stream.seek(-1 * pattern_len, os.SEEK_CUR)
1217 1230 else:
1218 1231 log.debug('reached end of buf (read {}<{}) after {} reads'
1219 1232 .format(len(buf), BUF_SIZE, n_reads))
1220 1233 break
1221 1234  
  1235 + # try parse
  1236 + for idx in candidates:
  1237 + # assume that in stream at idx there is a VBAInfoContainer
  1238 + stream.seek(idx)
  1239 + log.info('extracting at idx {}'.format(idx))
  1240 + try:
  1241 + container = VBAInfoContainer.extract_from(stream)
  1242 + except Exception:
  1243 + self._log_exception()
  1244 + continue
  1245 +
  1246 + errs = container.check_validity()
  1247 + if errs:
  1248 + log.warning('check_validity found {} issues'.format(len(errs)))
  1249 + else:
  1250 + log.info('container is ok')
  1251 + atom = container.vba_info_atom
  1252 + log.info('persist id ref is {}, has_macros {}, version {}'
  1253 + .format(atom.persist_id_ref, atom.f_has_macros,
  1254 + atom.version))
  1255 + for err in errs:
  1256 + log.warning('check_validity(VBAInfoContainer): {}'
  1257 + .format(err))
  1258 + if errs and self.fast_fail:
  1259 + raise errs[0]
  1260 +
1222 1261 finally:
1223 1262 if stream is not None:
1224 1263 log.debug('closing stream')
... ...