Commit 0a8eace56e838800847f1c540467981b37a8f07a
1 parent
27e0a1c8
start looking for vba a different way: search for record header of VBAInfoAtom/Container
Showing
1 changed file
with
64 additions
and
1 deletions
oletools/ppt_parser.py
| @@ -126,6 +126,18 @@ class RecordHeader(object): | @@ -126,6 +126,18 @@ class RecordHeader(object): | ||
| 126 | obj.rec_len)) | 126 | obj.rec_len)) |
| 127 | return obj | 127 | return obj |
| 128 | 128 | ||
| 129 | + @classmethod | ||
| 130 | + def generate(clz, rec_type, rec_len=None, rec_instance=0, rec_ver=0): | ||
| 131 | + """ generate a record header string given values | ||
| 132 | + | ||
| 133 | + length of result depends on rec_len being given or not | ||
| 134 | + """ | ||
| 135 | + version_instance = rec_ver + 2**4 * rec_instance | ||
| 136 | + if rec_len is None: | ||
| 137 | + return struct.pack('<HH', version_instance, rec_type) | ||
| 138 | + else: | ||
| 139 | + return struct.pack('<HHL', version_instance, rec_type, rec_len) | ||
| 140 | + | ||
| 129 | 141 | ||
| 130 | class PptType(object): | 142 | class PptType(object): |
| 131 | """ base class of data types found in ppt ole files | 143 | """ base class of data types found in ppt ole files |
| @@ -1162,6 +1174,56 @@ class PptParser(object): | @@ -1162,6 +1174,56 @@ class PptParser(object): | ||
| 1162 | if errs and self.fast_fail: | 1174 | if errs and self.fast_fail: |
| 1163 | raise errs[0] | 1175 | raise errs[0] |
| 1164 | 1176 | ||
| 1177 | + def search_vba(self): | ||
| 1178 | + """ quick-and-dirty: do not parse everything, just look for right bytes | ||
| 1179 | + | ||
| 1180 | + "quick" here means quick to program. Runtime now is linear is document | ||
| 1181 | + size (--> for big documents the other method might be faster) | ||
| 1182 | + """ | ||
| 1183 | + | ||
| 1184 | + BUF_SIZE = 1024 | ||
| 1185 | + | ||
| 1186 | + pattern = RecordHeader.generate( | ||
| 1187 | + VBAInfoContainer.RECORD_TYPE, rec_len=0x14, | ||
| 1188 | + rec_instance=VBAInfoContainer.RECORD_INSTANCE, | ||
| 1189 | + rec_ver=VBAInfoContainer.RECORD_VERSION) \ | ||
| 1190 | + + RecordHeader.generate( | ||
| 1191 | + VBAInfoAtom.RECORD_TYPE, rec_len=0xC, | ||
| 1192 | + rec_instance=VBAInfoAtom.RECORD_INSTANCE, | ||
| 1193 | + rec_ver=VBAInfoAtom.RECORD_VERSION) | ||
| 1194 | + pattern_len = len(pattern) | ||
| 1195 | + log.debug('pattern length is {}'.format(pattern_len)) | ||
| 1196 | + if pattern_len > BUF_SIZE: | ||
| 1197 | + raise ValueError('need buf > pattern to search!') | ||
| 1198 | + | ||
| 1199 | + stream = None | ||
| 1200 | + try: | ||
| 1201 | + log.debug('opening stream') | ||
| 1202 | + stream = self.ole.openstream(MAIN_STREAM_NAME) | ||
| 1203 | + n_reads = 0 | ||
| 1204 | + while True: | ||
| 1205 | + start_pos = stream.tell() | ||
| 1206 | + n_reads += 1 | ||
| 1207 | + #log.debug('read {} starting from {}' | ||
| 1208 | + # .format(BUF_SIZE, start_pos)) | ||
| 1209 | + buf = stream.read(BUF_SIZE) | ||
| 1210 | + idx = buf.find(pattern) | ||
| 1211 | + while idx != -1: | ||
| 1212 | + log.info('found pattern at index {}'.format(start_pos+idx)) | ||
| 1213 | + idx = buf.find(pattern, idx+1) | ||
| 1214 | + | ||
| 1215 | + if len(buf) == BUF_SIZE: | ||
| 1216 | + stream.seek(-1 * pattern_len, os.SEEK_CUR) | ||
| 1217 | + else: | ||
| 1218 | + log.debug('reached end of buf (read {}<{}) after {} reads' | ||
| 1219 | + .format(len(buf), BUF_SIZE, n_reads)) | ||
| 1220 | + break | ||
| 1221 | + | ||
| 1222 | + finally: | ||
| 1223 | + if stream is not None: | ||
| 1224 | + log.debug('closing stream') | ||
| 1225 | + stream.close() | ||
| 1226 | + | ||
| 1165 | # === TESTING ================================================================= | 1227 | # === TESTING ================================================================= |
| 1166 | 1228 | ||
| 1167 | def test(): | 1229 | def test(): |
| @@ -1180,7 +1242,8 @@ def test(): | @@ -1180,7 +1242,8 @@ def test(): | ||
| 1180 | log.info('-' * 72) | 1242 | log.info('-' * 72) |
| 1181 | log.info('test file: {}'.format(file_name)) | 1243 | log.info('test file: {}'.format(file_name)) |
| 1182 | ppt = PptParser(file_name, fast_fail=False) | 1244 | ppt = PptParser(file_name, fast_fail=False) |
| 1183 | - ppt.parse_document_persist_object() | 1245 | + #ppt.parse_document_persist_object() |
| 1246 | + ppt.search_vba() | ||
| 1184 | 1247 | ||
| 1185 | 1248 | ||
| 1186 | if __name__ == '__main__': | 1249 | if __name__ == '__main__': |