Commit 8ee2016145a6fba3626cf52fa7004651c39b0e13
1 parent
0a8eace5
successfully found and parsed VBAInfoContainer+Atom but still no VBA code...
Showing
1 changed file
with
47 additions
and
8 deletions
oletools/ppt_parser.py
| ... | ... | @@ -838,23 +838,28 @@ class VBAInfoContainer(PptType): |
| 838 | 838 | RECORD_TYPE = 0x03FF |
| 839 | 839 | RECORD_VERSION = 0xF |
| 840 | 840 | RECORD_INSTANCE = 0x001 |
| 841 | + RECORD_LENGTH = 0x14 | |
| 841 | 842 | |
| 842 | 843 | def __init__(self): |
| 843 | 844 | super(VBAInfoContainer, self).__init__() |
| 844 | 845 | self.vba_info_atom = None |
| 845 | 846 | |
| 846 | 847 | @classmethod |
| 847 | - def extract_from(clz, stream, rec_head): | |
| 848 | + def extract_from(clz, stream, rec_head=None): | |
| 848 | 849 | """ since can determine this type only after reading header, it is arg |
| 849 | 850 | """ |
| 850 | 851 | log.debug('parsing VBAInfoContainer') |
| 851 | 852 | obj = clz() |
| 852 | - obj.rec_head = rec_head | |
| 853 | + if rec_head is None: | |
| 854 | + obj.read_rec_head(stream) | |
| 855 | + else: | |
| 856 | + log.debug('skip parsing of RecordHead') | |
| 857 | + obj.rec_head = rec_head | |
| 853 | 858 | obj.vba_info_atom = VBAInfoAtom.extract_from(stream) |
| 854 | 859 | return obj |
| 855 | 860 | |
| 856 | 861 | def check_validity(self): |
| 857 | - errs = self.check_rec_head(length=0x14) | |
| 862 | + errs = self.check_rec_head(length=self.RECORD_LENGTH) | |
| 858 | 863 | errs.extend(self.vba_info_atom.check_validity()) |
| 859 | 864 | return errs |
| 860 | 865 | |
| ... | ... | @@ -867,6 +872,7 @@ class VBAInfoAtom(PptType): |
| 867 | 872 | |
| 868 | 873 | RECORD_TYPE = 0x0400 |
| 869 | 874 | RECORD_VERSION = 0x2 |
| 875 | + RECORD_LENGTH = 0x0C | |
| 870 | 876 | |
| 871 | 877 | def __init__(self): |
| 872 | 878 | super(VBAInfoAtom, self).__init__() |
| ... | ... | @@ -878,7 +884,7 @@ class VBAInfoAtom(PptType): |
| 878 | 884 | def extract_from(clz, stream): |
| 879 | 885 | log.debug('parsing VBAInfoAtom') |
| 880 | 886 | obj = clz() |
| 881 | - obj.read_rec_head() | |
| 887 | + obj.read_rec_head(stream) | |
| 882 | 888 | |
| 883 | 889 | # persistIdRef (4 bytes): A PersistIdRef (section 2.2.21) that |
| 884 | 890 | # specifies the value to look up in the persist object directory to |
| ... | ... | @@ -897,9 +903,9 @@ class VBAInfoAtom(PptType): |
| 897 | 903 | |
| 898 | 904 | return obj |
| 899 | 905 | |
| 900 | - def check_validty(self): | |
| 906 | + def check_validity(self): | |
| 901 | 907 | |
| 902 | - errs = self.check_rec_head(length=0x14) | |
| 908 | + errs = self.check_rec_head(length=self.RECORD_LENGTH) | |
| 903 | 909 | |
| 904 | 910 | # must be 0 or 1: |
| 905 | 911 | errs.extend(self.check_range('fHasMacros', self.f_has_macros, None, 2)) |
| ... | ... | @@ -1184,11 +1190,13 @@ class PptParser(object): |
| 1184 | 1190 | BUF_SIZE = 1024 |
| 1185 | 1191 | |
| 1186 | 1192 | pattern = RecordHeader.generate( |
| 1187 | - VBAInfoContainer.RECORD_TYPE, rec_len=0x14, | |
| 1193 | + VBAInfoContainer.RECORD_TYPE, | |
| 1194 | + rec_len=VBAInfoContainer.RECORD_LENGTH, | |
| 1188 | 1195 | rec_instance=VBAInfoContainer.RECORD_INSTANCE, |
| 1189 | 1196 | rec_ver=VBAInfoContainer.RECORD_VERSION) \ |
| 1190 | 1197 | + RecordHeader.generate( |
| 1191 | - VBAInfoAtom.RECORD_TYPE, rec_len=0xC, | |
| 1198 | + VBAInfoAtom.RECORD_TYPE, | |
| 1199 | + rec_len=VBAInfoAtom.RECORD_LENGTH, | |
| 1192 | 1200 | rec_instance=VBAInfoAtom.RECORD_INSTANCE, |
| 1193 | 1201 | rec_ver=VBAInfoAtom.RECORD_VERSION) |
| 1194 | 1202 | pattern_len = len(pattern) |
| ... | ... | @@ -1200,7 +1208,10 @@ class PptParser(object): |
| 1200 | 1208 | try: |
| 1201 | 1209 | log.debug('opening stream') |
| 1202 | 1210 | stream = self.ole.openstream(MAIN_STREAM_NAME) |
| 1211 | + | |
| 1212 | + # look for candidate positions | |
| 1203 | 1213 | n_reads = 0 |
| 1214 | + candidates = [] | |
| 1204 | 1215 | while True: |
| 1205 | 1216 | start_pos = stream.tell() |
| 1206 | 1217 | n_reads += 1 |
| ... | ... | @@ -1210,15 +1221,43 @@ class PptParser(object): |
| 1210 | 1221 | idx = buf.find(pattern) |
| 1211 | 1222 | while idx != -1: |
| 1212 | 1223 | log.info('found pattern at index {}'.format(start_pos+idx)) |
| 1224 | + candidates.append(start_pos+idx) | |
| 1213 | 1225 | idx = buf.find(pattern, idx+1) |
| 1214 | 1226 | |
| 1215 | 1227 | if len(buf) == BUF_SIZE: |
| 1228 | + # move back a bit to avoid splitting of pattern through buf | |
| 1216 | 1229 | stream.seek(-1 * pattern_len, os.SEEK_CUR) |
| 1217 | 1230 | else: |
| 1218 | 1231 | log.debug('reached end of buf (read {}<{}) after {} reads' |
| 1219 | 1232 | .format(len(buf), BUF_SIZE, n_reads)) |
| 1220 | 1233 | break |
| 1221 | 1234 | |
| 1235 | + # try parse | |
| 1236 | + for idx in candidates: | |
| 1237 | + # assume that in stream at idx there is a VBAInfoContainer | |
| 1238 | + stream.seek(idx) | |
| 1239 | + log.info('extracting at idx {}'.format(idx)) | |
| 1240 | + try: | |
| 1241 | + container = VBAInfoContainer.extract_from(stream) | |
| 1242 | + except Exception: | |
| 1243 | + self._log_exception() | |
| 1244 | + continue | |
| 1245 | + | |
| 1246 | + errs = container.check_validity() | |
| 1247 | + if errs: | |
| 1248 | + log.warning('check_validity found {} issues'.format(len(errs))) | |
| 1249 | + else: | |
| 1250 | + log.info('container is ok') | |
| 1251 | + atom = container.vba_info_atom | |
| 1252 | + log.info('persist id ref is {}, has_macros {}, version {}' | |
| 1253 | + .format(atom.persist_id_ref, atom.f_has_macros, | |
| 1254 | + atom.version)) | |
| 1255 | + for err in errs: | |
| 1256 | + log.warning('check_validity(VBAInfoContainer): {}' | |
| 1257 | + .format(err)) | |
| 1258 | + if errs and self.fast_fail: | |
| 1259 | + raise errs[0] | |
| 1260 | + | |
| 1222 | 1261 | finally: |
| 1223 | 1262 | if stream is not None: |
| 1224 | 1263 | log.debug('closing stream') | ... | ... |