Commit 05a27a43643562e52dbc3710e7e3dd044adf872f
1 parent
8ee20161
managed to extract vba stream from ppt by byte-search for ExternalObjectStorage
Qapla'
Showing
1 changed file
with
300 additions
and
43 deletions
oletools/ppt_parser.py
| @@ -19,7 +19,8 @@ References: | @@ -19,7 +19,8 @@ References: | ||
| 19 | # - make stream optional in PptUnexpectedData | 19 | # - make stream optional in PptUnexpectedData |
| 20 | # - can speed-up by using less bigger struct.parse calls? | 20 | # - can speed-up by using less bigger struct.parse calls? |
| 21 | # - license | 21 | # - license |
| 22 | -# - create a AtomBase class that defines check_value and parses RecordHead? | 22 | +# - make buffered stream from output of iterative_decompress |
| 23 | +# - less stream open/close, possibly through decorator for open+closing? | ||
| 23 | # | 24 | # |
| 24 | # CHANGELOG: | 25 | # CHANGELOG: |
| 25 | # 2016-05-04 v0.01 CH: - start parsing "Current User" stream | 26 | # 2016-05-04 v0.01 CH: - start parsing "Current User" stream |
| @@ -33,9 +34,11 @@ import logging | @@ -33,9 +34,11 @@ import logging | ||
| 33 | import struct | 34 | import struct |
| 34 | import traceback | 35 | import traceback |
| 35 | import os | 36 | import os |
| 37 | +import cStringIO | ||
| 36 | 38 | ||
| 37 | import thirdparty.olefile as olefile | 39 | import thirdparty.olefile as olefile |
| 38 | from olevba import get_logger | 40 | from olevba import get_logger |
| 41 | +import zlib | ||
| 39 | 42 | ||
| 40 | 43 | ||
| 41 | # a global logger object used for debugging: | 44 | # a global logger object used for debugging: |
| @@ -132,6 +135,8 @@ class RecordHeader(object): | @@ -132,6 +135,8 @@ class RecordHeader(object): | ||
| 132 | 135 | ||
| 133 | length of result depends on rec_len being given or not | 136 | length of result depends on rec_len being given or not |
| 134 | """ | 137 | """ |
| 138 | + if rec_type is None: | ||
| 139 | + raise ValueError('RECORD_TYPE not set!') | ||
| 135 | version_instance = rec_ver + 2**4 * rec_instance | 140 | version_instance = rec_ver + 2**4 * rec_instance |
| 136 | if rec_len is None: | 141 | if rec_len is None: |
| 137 | return struct.pack('<HH', version_instance, rec_type) | 142 | return struct.pack('<HH', version_instance, rec_type) |
| @@ -161,7 +166,12 @@ class PptType(object): | @@ -161,7 +166,12 @@ class PptType(object): | ||
| 161 | self.rec_head = RecordHeader.extract_from(stream) | 166 | self.rec_head = RecordHeader.extract_from(stream) |
| 162 | 167 | ||
| 163 | def check_validity(self): | 168 | def check_validity(self): |
| 164 | - """ to be overwritten in subclasses | 169 | + """ check validity of data |
| 170 | + | ||
| 171 | + replaces 'raise PptUnexpectedData' so caller can get all the errors | ||
| 172 | + (not just the first) whenever she wishes. | ||
| 173 | + | ||
| 174 | + to be overwritten in subclasses | ||
| 165 | 175 | ||
| 166 | :returns: list of PptUnexpectedData | 176 | :returns: list of PptUnexpectedData |
| 167 | """ | 177 | """ |
| @@ -243,6 +253,12 @@ class PptType(object): | @@ -243,6 +253,12 @@ class PptType(object): | ||
| 243 | self.rec_head.rec_len, length)) | 253 | self.rec_head.rec_len, length)) |
| 244 | return errs | 254 | return errs |
| 245 | 255 | ||
| 256 | + @classmethod | ||
| 257 | + def generate_pattern(clz, rec_len=None): | ||
| 258 | + """ call RecordHeader.generate with values for this type """ | ||
| 259 | + return RecordHeader.generate(clz.RECORD_TYPE, rec_len, | ||
| 260 | + clz.RECORD_INSTANCE, clz.RECORD_VERSION) | ||
| 261 | + | ||
| 246 | 262 | ||
| 247 | class CurrentUserAtom(PptType): | 263 | class CurrentUserAtom(PptType): |
| 248 | """ An atom record that specifies information about the last user to modify | 264 | """ An atom record that specifies information about the last user to modify |
| @@ -853,7 +869,7 @@ class VBAInfoContainer(PptType): | @@ -853,7 +869,7 @@ class VBAInfoContainer(PptType): | ||
| 853 | if rec_head is None: | 869 | if rec_head is None: |
| 854 | obj.read_rec_head(stream) | 870 | obj.read_rec_head(stream) |
| 855 | else: | 871 | else: |
| 856 | - log.debug('skip parsing of RecordHead') | 872 | + log.debug('skip parsing of RecordHeader') |
| 857 | obj.rec_head = rec_head | 873 | obj.rec_head = rec_head |
| 858 | obj.vba_info_atom = VBAInfoAtom.extract_from(stream) | 874 | obj.vba_info_atom = VBAInfoAtom.extract_from(stream) |
| 859 | return obj | 875 | return obj |
| @@ -912,6 +928,92 @@ class VBAInfoAtom(PptType): | @@ -912,6 +928,92 @@ class VBAInfoAtom(PptType): | ||
| 912 | errs.extend(self.check_value('version', self.version, 2)) | 928 | errs.extend(self.check_value('version', self.version, 2)) |
| 913 | return errs | 929 | return errs |
| 914 | 930 | ||
| 931 | + | ||
| 932 | +class ExternalObjectStorage(PptType): | ||
| 933 | + """ storage for compressed/uncompressed OLE/VBA/ActiveX control data | ||
| 934 | + | ||
| 935 | + Matches types ExOleObjStgCompressedAtom, ExOleObjStgUncompressedAtom, | ||
| 936 | + VbaProjectStgCompressedAtom, VbaProjectStgUncompressedAtom, | ||
| 937 | + ExControlStgUncompressedAtom, ExControlStgCompressedAtom | ||
| 938 | + | ||
| 939 | + Difference between compressed and uncompressed: RecordHeader.rec_instance | ||
| 940 | + is 0 or 1, first variable after RecordHeader is decompressed_size | ||
| 941 | + | ||
| 942 | + Data is not read at first, only its offset in the stream and size is saved | ||
| 943 | + | ||
| 944 | + e.g. | ||
| 945 | + https://msdn.microsoft.com/en-us/library/dd952169%28v=office.12%29.aspx | ||
| 946 | + """ | ||
| 947 | + | ||
| 948 | + RECORD_TYPE = 0x1011 | ||
| 949 | + RECORD_INSTANCE_COMPRESSED = 1 | ||
| 950 | + RECORD_INSTANCE_UNCOMPRESSED = 0 | ||
| 951 | + | ||
| 952 | + def __init__(self, compressed=None): | ||
| 953 | + super(ExternalObjectStorage, self).__init__() | ||
| 954 | + if compressed is None: | ||
| 955 | + self.RECORD_INSTANCE = None # otherwise defaults to 0 | ||
| 956 | + elif compressed: | ||
| 957 | + self.RECORD_INSTANCE = self.RECORD_INSTANCE_COMPRESSED | ||
| 958 | + self.compressed = True | ||
| 959 | + else: | ||
| 960 | + self.RECORD_INSTANCE = self.RECORD_INSTANCE_UNCOMPRESSED | ||
| 961 | + self.compressed = False | ||
| 962 | + self.uncompressed_size = None | ||
| 963 | + self.data_offset = None | ||
| 964 | + self.data_size = None | ||
| 965 | + | ||
| 966 | + def extract_from(self, stream): | ||
| 967 | + """ not a classmethod because of compressed attrib | ||
| 968 | + | ||
| 969 | + see also: DummyType | ||
| 970 | + """ | ||
| 971 | + log.debug('Parsing ExternalObjectStorage (compressed={}) from stream' | ||
| 972 | + .format(self.compressed)) | ||
| 973 | + self.read_rec_head(stream) | ||
| 974 | + self.data_size = self.rec_head.rec_len | ||
| 975 | + if self.compressed: | ||
| 976 | + log.debug('is compressed --> reduce size') | ||
| 977 | + self.uncompressed_size = read_4(stream) | ||
| 978 | + self.data_size -= 4 | ||
| 979 | + self.data_offset = stream.tell() | ||
| 980 | + | ||
| 981 | + def check_validity(self): | ||
| 982 | + return self.check_rec_head() | ||
| 983 | + | ||
| 984 | + | ||
| 985 | +class ExternalObjectStorageUncompressed(ExternalObjectStorage): | ||
| 986 | + """ subclass of ExternalObjectStorage for uncompressed objects """ | ||
| 987 | + RECORD_INSTANCE = ExternalObjectStorage.RECORD_INSTANCE_UNCOMPRESSED | ||
| 988 | + | ||
| 989 | + def __init__(self): | ||
| 990 | + super(ExternalObjectStorageUncompressed, self).__init__(False) | ||
| 991 | + | ||
| 992 | + @classmethod | ||
| 993 | + def extract_from(clz, stream): | ||
| 994 | + """ note the usage of super here: call instance method of super class! | ||
| 995 | + """ | ||
| 996 | + obj = clz() | ||
| 997 | + super(ExternalObjectStorageUncompressed, obj).extract_from(stream) | ||
| 998 | + return obj | ||
| 999 | + | ||
| 1000 | + | ||
| 1001 | +class ExternalObjectStorageCompressed(ExternalObjectStorage): | ||
| 1002 | + """ subclass of ExternalObjectStorage for compressed objects """ | ||
| 1003 | + RECORD_INSTANCE = ExternalObjectStorage.RECORD_INSTANCE_COMPRESSED | ||
| 1004 | + | ||
| 1005 | + def __init__(self): | ||
| 1006 | + super(ExternalObjectStorageCompressed, self).__init__(True) | ||
| 1007 | + | ||
| 1008 | + @classmethod | ||
| 1009 | + def extract_from(clz, stream): | ||
| 1010 | + """ note the usage of super here: call instance method of super class! | ||
| 1011 | + """ | ||
| 1012 | + obj = clz() | ||
| 1013 | + super(ExternalObjectStorageCompressed, obj).extract_from(stream) | ||
| 1014 | + return obj | ||
| 1015 | + | ||
| 1016 | + | ||
| 915 | # === PptParser =============================================================== | 1017 | # === PptParser =============================================================== |
| 916 | 1018 | ||
| 917 | 1019 | ||
| @@ -1180,59 +1282,65 @@ class PptParser(object): | @@ -1180,59 +1282,65 @@ class PptParser(object): | ||
| 1180 | if errs and self.fast_fail: | 1282 | if errs and self.fast_fail: |
| 1181 | raise errs[0] | 1283 | raise errs[0] |
| 1182 | 1284 | ||
| 1183 | - def search_vba(self): | ||
| 1184 | - """ quick-and-dirty: do not parse everything, just look for right bytes | ||
| 1185 | - | ||
| 1186 | - "quick" here means quick to program. Runtime now is linear is document | ||
| 1187 | - size (--> for big documents the other method might be faster) | ||
| 1188 | - """ | 1285 | + def search_pattern(self, pattern, stream): |
| 1286 | + """ search for pattern in stream, return indices """ | ||
| 1189 | 1287 | ||
| 1190 | BUF_SIZE = 1024 | 1288 | BUF_SIZE = 1024 |
| 1191 | 1289 | ||
| 1192 | - pattern = RecordHeader.generate( | ||
| 1193 | - VBAInfoContainer.RECORD_TYPE, | ||
| 1194 | - rec_len=VBAInfoContainer.RECORD_LENGTH, | ||
| 1195 | - rec_instance=VBAInfoContainer.RECORD_INSTANCE, | ||
| 1196 | - rec_ver=VBAInfoContainer.RECORD_VERSION) \ | ||
| 1197 | - + RecordHeader.generate( | ||
| 1198 | - VBAInfoAtom.RECORD_TYPE, | ||
| 1199 | - rec_len=VBAInfoAtom.RECORD_LENGTH, | ||
| 1200 | - rec_instance=VBAInfoAtom.RECORD_INSTANCE, | ||
| 1201 | - rec_ver=VBAInfoAtom.RECORD_VERSION) | ||
| 1202 | pattern_len = len(pattern) | 1290 | pattern_len = len(pattern) |
| 1203 | log.debug('pattern length is {}'.format(pattern_len)) | 1291 | log.debug('pattern length is {}'.format(pattern_len)) |
| 1204 | if pattern_len > BUF_SIZE: | 1292 | if pattern_len > BUF_SIZE: |
| 1205 | raise ValueError('need buf > pattern to search!') | 1293 | raise ValueError('need buf > pattern to search!') |
| 1206 | 1294 | ||
| 1295 | + n_reads = 0 | ||
| 1296 | + candidates = [] | ||
| 1297 | + while True: | ||
| 1298 | + start_pos = stream.tell() | ||
| 1299 | + n_reads += 1 | ||
| 1300 | + #log.debug('read {} starting from {}' | ||
| 1301 | + # .format(BUF_SIZE, start_pos)) | ||
| 1302 | + buf = stream.read(BUF_SIZE) | ||
| 1303 | + idx = buf.find(pattern) | ||
| 1304 | + while idx != -1: | ||
| 1305 | + log.info('found pattern at index {}'.format(start_pos+idx)) | ||
| 1306 | + candidates.append(start_pos+idx) | ||
| 1307 | + idx = buf.find(pattern, idx+1) | ||
| 1308 | + | ||
| 1309 | + if len(buf) == BUF_SIZE: | ||
| 1310 | + # move back a bit to avoid splitting of pattern through buf | ||
| 1311 | + stream.seek(-1 * pattern_len, os.SEEK_CUR) | ||
| 1312 | + else: | ||
| 1313 | + log.debug('reached end of buf (read {}<{}) after {} reads' | ||
| 1314 | + .format(len(buf), BUF_SIZE, n_reads)) | ||
| 1315 | + break | ||
| 1316 | + return candidates | ||
| 1317 | + | ||
| 1318 | + | ||
| 1319 | + def search_vba_info(self): | ||
| 1320 | + """ search through stream for VBAInfoContainer, alternative to parse... | ||
| 1321 | + | ||
| 1322 | + quick-and-dirty: do not parse everything, just look for right bytes | ||
| 1323 | + | ||
| 1324 | + "quick" here means quick to program. Runtime now is linear is document | ||
| 1325 | + size (--> for big documents the other method might be faster) | ||
| 1326 | + | ||
| 1327 | + .. seealso:: search_vba_storage | ||
| 1328 | + """ | ||
| 1329 | + | ||
| 1330 | + pattern = VBAInfoContainer.generate_pattern( | ||
| 1331 | + rec_len=VBAInfoContainer.RECORD_LENGTH) \ | ||
| 1332 | + + VBAInfoAtom.generate_pattern( | ||
| 1333 | + rec_len=VBAInfoAtom.RECORD_LENGTH) | ||
| 1207 | stream = None | 1334 | stream = None |
| 1208 | try: | 1335 | try: |
| 1209 | log.debug('opening stream') | 1336 | log.debug('opening stream') |
| 1210 | stream = self.ole.openstream(MAIN_STREAM_NAME) | 1337 | stream = self.ole.openstream(MAIN_STREAM_NAME) |
| 1211 | 1338 | ||
| 1212 | # look for candidate positions | 1339 | # look for candidate positions |
| 1213 | - n_reads = 0 | ||
| 1214 | - candidates = [] | ||
| 1215 | - while True: | ||
| 1216 | - start_pos = stream.tell() | ||
| 1217 | - n_reads += 1 | ||
| 1218 | - #log.debug('read {} starting from {}' | ||
| 1219 | - # .format(BUF_SIZE, start_pos)) | ||
| 1220 | - buf = stream.read(BUF_SIZE) | ||
| 1221 | - idx = buf.find(pattern) | ||
| 1222 | - while idx != -1: | ||
| 1223 | - log.info('found pattern at index {}'.format(start_pos+idx)) | ||
| 1224 | - candidates.append(start_pos+idx) | ||
| 1225 | - idx = buf.find(pattern, idx+1) | ||
| 1226 | - | ||
| 1227 | - if len(buf) == BUF_SIZE: | ||
| 1228 | - # move back a bit to avoid splitting of pattern through buf | ||
| 1229 | - stream.seek(-1 * pattern_len, os.SEEK_CUR) | ||
| 1230 | - else: | ||
| 1231 | - log.debug('reached end of buf (read {}<{}) after {} reads' | ||
| 1232 | - .format(len(buf), BUF_SIZE, n_reads)) | ||
| 1233 | - break | 1340 | + candidates = self.search_pattern(pattern, stream) |
| 1234 | 1341 | ||
| 1235 | # try parse | 1342 | # try parse |
| 1343 | + containers = [] | ||
| 1236 | for idx in candidates: | 1344 | for idx in candidates: |
| 1237 | # assume that in stream at idx there is a VBAInfoContainer | 1345 | # assume that in stream at idx there is a VBAInfoContainer |
| 1238 | stream.seek(idx) | 1346 | stream.seek(idx) |
| @@ -1252,23 +1360,149 @@ class PptParser(object): | @@ -1252,23 +1360,149 @@ class PptParser(object): | ||
| 1252 | log.info('persist id ref is {}, has_macros {}, version {}' | 1360 | log.info('persist id ref is {}, has_macros {}, version {}' |
| 1253 | .format(atom.persist_id_ref, atom.f_has_macros, | 1361 | .format(atom.persist_id_ref, atom.f_has_macros, |
| 1254 | atom.version)) | 1362 | atom.version)) |
| 1363 | + containers.append(container) | ||
| 1255 | for err in errs: | 1364 | for err in errs: |
| 1256 | log.warning('check_validity(VBAInfoContainer): {}' | 1365 | log.warning('check_validity(VBAInfoContainer): {}' |
| 1257 | .format(err)) | 1366 | .format(err)) |
| 1258 | if errs and self.fast_fail: | 1367 | if errs and self.fast_fail: |
| 1259 | raise errs[0] | 1368 | raise errs[0] |
| 1260 | 1369 | ||
| 1370 | + return containers | ||
| 1371 | + | ||
| 1261 | finally: | 1372 | finally: |
| 1262 | if stream is not None: | 1373 | if stream is not None: |
| 1263 | log.debug('closing stream') | 1374 | log.debug('closing stream') |
| 1264 | stream.close() | 1375 | stream.close() |
| 1265 | 1376 | ||
| 1377 | + def search_vba_storage(self): | ||
| 1378 | + """ search through stream for VBAProjectStg, alternative to parse... | ||
| 1379 | + | ||
| 1380 | + quick-and-dirty: do not parse everything, just look for right bytes | ||
| 1381 | + | ||
| 1382 | + "quick" here means quick to program. Runtime now is linear is document | ||
| 1383 | + size (--> for big documents the other method might be faster) | ||
| 1384 | + | ||
| 1385 | + The storages found could also contain (instead of VBA data): ActiveX | ||
| 1386 | + data or general OLE data | ||
| 1387 | + | ||
| 1388 | + .. seealso:: :py:meth:`search_vba_info` | ||
| 1389 | + """ | ||
| 1390 | + | ||
| 1391 | + stream = None | ||
| 1392 | + try: | ||
| 1393 | + log.debug('opening stream') | ||
| 1394 | + stream = self.ole.openstream(MAIN_STREAM_NAME) | ||
| 1395 | + | ||
| 1396 | + storages = [] | ||
| 1397 | + for obj_type in (ExternalObjectStorageUncompressed, | ||
| 1398 | + ExternalObjectStorageCompressed): | ||
| 1399 | + # re-position stream at start | ||
| 1400 | + stream.seek(0, os.SEEK_SET) | ||
| 1401 | + | ||
| 1402 | + # look for candidate positions | ||
| 1403 | + pattern = obj_type.generate_pattern() | ||
| 1404 | + candidates = self.search_pattern(pattern, stream) | ||
| 1405 | + | ||
| 1406 | + # try parse | ||
| 1407 | + for idx in candidates: | ||
| 1408 | + # assume a ExternalObjectStorage in stream at idx | ||
| 1409 | + stream.seek(idx) | ||
| 1410 | + log.info('extracting at idx {}'.format(idx)) | ||
| 1411 | + try: | ||
| 1412 | + storage = obj_type.extract_from(stream) | ||
| 1413 | + except Exception: | ||
| 1414 | + self._log_exception() | ||
| 1415 | + continue | ||
| 1416 | + | ||
| 1417 | + errs = storage.check_validity() | ||
| 1418 | + if errs: | ||
| 1419 | + log.warning('check_validity found {} issues' | ||
| 1420 | + .format(len(errs))) | ||
| 1421 | + else: | ||
| 1422 | + log.info('storage is ok; compressed={}, size={}, ' | ||
| 1423 | + 'size_decomp={}' | ||
| 1424 | + .format(storage.compressed, | ||
| 1425 | + storage.rec_head.rec_len, | ||
| 1426 | + storage.uncompressed_size)) | ||
| 1427 | + storages.append(storage) | ||
| 1428 | + for err in errs: | ||
| 1429 | + log.warning('check_validity({}): {}' | ||
| 1430 | + .format(obj_type.__name__, err)) | ||
| 1431 | + if errs and self.fast_fail: | ||
| 1432 | + raise errs[0] | ||
| 1433 | + | ||
| 1434 | + return storages | ||
| 1435 | + | ||
| 1436 | + finally: | ||
| 1437 | + if stream is not None: | ||
| 1438 | + log.debug('closing stream') | ||
| 1439 | + stream.close() | ||
| 1440 | + | ||
| 1441 | + | ||
| 1442 | + def decompress_vba_storage(self, storage): | ||
| 1443 | + """ return decompressed data from search_vba_storage """ | ||
| 1444 | + | ||
| 1445 | + log.debug('decompressing storage for VBA OLE data stream ') | ||
| 1446 | + stream = None | ||
| 1447 | + try: | ||
| 1448 | + log.debug('opening stream') | ||
| 1449 | + stream = self.ole.openstream(MAIN_STREAM_NAME) | ||
| 1450 | + | ||
| 1451 | + # decompress iteratively; a zlib.decompress of all data | ||
| 1452 | + # failed with Error -5 (incomplete or truncated stream) | ||
| 1453 | + stream.seek(storage.data_offset, os.SEEK_SET) | ||
| 1454 | + decomp, n_read, err = \ | ||
| 1455 | + iterative_decompress(stream, storage.data_size) | ||
| 1456 | + log.info('decompressed {} to {} bytes, err is {}' | ||
| 1457 | + .format(n_read, len(decomp), err)) | ||
| 1458 | + if err and self.fast_fail: | ||
| 1459 | + raise err | ||
| 1460 | + # otherwise try to continue with partial data | ||
| 1461 | + | ||
| 1462 | + return decomp | ||
| 1463 | + | ||
| 1464 | + ## create OleFileIO from decompressed data | ||
| 1465 | + #ole = olefile.OleFileIO(decomp) | ||
| 1466 | + #root_streams = [entry[0].lower() for entry in ole.listdir()] | ||
| 1467 | + #for required in 'project', 'projectwm', 'vba': | ||
| 1468 | + # if required not in root_streams: | ||
| 1469 | + # raise ValueError('storage seems to not be a VBA storage ' | ||
| 1470 | + # '({} not found in root streams)' | ||
| 1471 | + # .format(required)) | ||
| 1472 | + #log.debug('tests succeeded') | ||
| 1473 | + #return ole | ||
| 1474 | + | ||
| 1475 | + finally: | ||
| 1476 | + if stream is not None: | ||
| 1477 | + log.debug('closing stream') | ||
| 1478 | + stream.close() | ||
| 1479 | + | ||
| 1480 | + | ||
| 1481 | +def iterative_decompress(stream, size, chunk_size=4096): | ||
| 1482 | + """ decompress data from stream chunk-wise """ | ||
| 1483 | + | ||
| 1484 | + decompressor = zlib.decompressobj() | ||
| 1485 | + n_read = 0 | ||
| 1486 | + decomp = '' | ||
| 1487 | + return_err = None | ||
| 1488 | + | ||
| 1489 | + try: | ||
| 1490 | + while n_read < size: | ||
| 1491 | + n_new = min(size-n_read, chunk_size) | ||
| 1492 | + decomp += decompressor.decompress(stream.read(n_new)) | ||
| 1493 | + n_read += n_new | ||
| 1494 | + except zlib.error as err: | ||
| 1495 | + return_err = err | ||
| 1496 | + | ||
| 1497 | + return decomp, n_read, return_err | ||
| 1498 | + | ||
| 1266 | # === TESTING ================================================================= | 1499 | # === TESTING ================================================================= |
| 1267 | 1500 | ||
| 1268 | def test(): | 1501 | def test(): |
| 1269 | """ for testing and debugging """ | 1502 | """ for testing and debugging """ |
| 1270 | 1503 | ||
| 1271 | from glob import glob | 1504 | from glob import glob |
| 1505 | + from olevba import VBA_Parser | ||
| 1272 | 1506 | ||
| 1273 | # setup logging | 1507 | # setup logging |
| 1274 | logging.basicConfig(level=logging.DEBUG, | 1508 | logging.basicConfig(level=logging.DEBUG, |
| @@ -1280,9 +1514,32 @@ def test(): | @@ -1280,9 +1514,32 @@ def test(): | ||
| 1280 | # parse | 1514 | # parse |
| 1281 | log.info('-' * 72) | 1515 | log.info('-' * 72) |
| 1282 | log.info('test file: {}'.format(file_name)) | 1516 | log.info('test file: {}'.format(file_name)) |
| 1283 | - ppt = PptParser(file_name, fast_fail=False) | ||
| 1284 | - #ppt.parse_document_persist_object() | ||
| 1285 | - ppt.search_vba() | 1517 | + try: |
| 1518 | + ppt = PptParser(file_name, fast_fail=False) | ||
| 1519 | + #ppt.parse_document_persist_object() | ||
| 1520 | + n_infos = len(ppt.search_vba_info()) | ||
| 1521 | + storages = ppt.search_vba_storage() | ||
| 1522 | + n_storages = len(storages) | ||
| 1523 | + log.debug('found {} infos and {} storages'.format(n_infos, | ||
| 1524 | + n_storages)) | ||
| 1525 | + if n_infos != n_storages: | ||
| 1526 | + log.warning('found different number of vba infos and storages') | ||
| 1527 | + for storage in storages: | ||
| 1528 | + parser = VBA_Parser(None, ppt.decompress_vba_storage(storage), | ||
| 1529 | + container=file_name) | ||
| 1530 | + for vba_root, project_path, dir_path in parser.find_vba_projects(): | ||
| 1531 | + log.info('found vba project: root={}, proj={}, dir={}' | ||
| 1532 | + .format(vba_root, project_path, dir_path)) | ||
| 1533 | + for subfilename, stream_path, vba_filename, vba_code in \ | ||
| 1534 | + parser.extract_all_macros(): | ||
| 1535 | + log.info('found macro: subfile={}, stream={}, vbafile={}' | ||
| 1536 | + .format(subfilename, stream_path, vba_filename)) | ||
| 1537 | + for line in vba_code.splitlines(): | ||
| 1538 | + log.info('code: {}'.format(line.rstrip())) | ||
| 1539 | + | ||
| 1540 | + | ||
| 1541 | + except Exception: | ||
| 1542 | + log.exception('exception') | ||
| 1286 | 1543 | ||
| 1287 | 1544 | ||
| 1288 | if __name__ == '__main__': | 1545 | if __name__ == '__main__': |