Commit a60d9f3e64f95cc7f6d374df9fcefa154166d624
1 parent
42fbc3ee
tweaked logging (lots of info-->debug, some removed; added some comments
Showing
2 changed files
with
40 additions
and
28 deletions
oletools/olevba.py
| ... | ... | @@ -2198,34 +2198,42 @@ class VBA_Parser(object): |
| 2198 | 2198 | ole_subfiles (except find_vba_* which needs to explicitly check for |
| 2199 | 2199 | self.type) |
| 2200 | 2200 | """ |
| 2201 | + | |
| 2201 | 2202 | log.info('Check whether OLE file is PPT') |
| 2202 | 2203 | ppt_parser.enable_logging() |
| 2203 | 2204 | try: |
| 2204 | 2205 | ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True) |
| 2206 | + info_container = ppt.search_vba_info() | |
| 2207 | + n_infos = len(info_container) | |
| 2208 | + n_macros = sum(1 for info in info_container | |
| 2209 | + if info.vba_info_atom.f_has_macros > 0) | |
| 2205 | 2210 | n_infos = len(ppt.search_vba_info()) |
| 2211 | + # TODO: does it make sense at all to continue if n_macros == 0? | |
| 2212 | + # --> no vba-info, so all storages probably ActiveX or other OLE | |
| 2206 | 2213 | storages = ppt.search_vba_storage() |
| 2207 | 2214 | n_storages = len(storages) |
| 2208 | - log.debug('ppt: found {} infos and {} storages'.format(n_infos, | |
| 2209 | - n_storages)) | |
| 2210 | - if n_infos != n_storages: | |
| 2211 | - # probably, some storages are ActiveX or other OLE types | |
| 2212 | - log.warning('ppt: found different number of vba infos ({} and ' | |
| 2213 | - 'storages ({}) --> subfiles might make trouble' | |
| 2214 | - .format(n_infos, n_storages)) | |
| 2215 | + n_compressed = 0 | |
| 2215 | 2216 | for storage in storages: |
| 2216 | 2217 | if storage.is_compressed: |
| 2217 | 2218 | storage_decomp = ppt.decompress_vba_storage(storage) |
| 2219 | + n_compressed += 1 | |
| 2218 | 2220 | else: |
| 2219 | 2221 | log.warning('just guessing here: decompressed storage = storage?') |
| 2220 | 2222 | storage_decomp = storage.read_all() # not implemented yet |
| 2221 | 2223 | self.ole_subfiles.append(VBA_Parser(None, storage_decomp, |
| 2222 | 2224 | container='PptParser')) |
| 2225 | + log.info('File is PPT with {} vba infos ({} with macros) and {} ' | |
| 2226 | + 'vba storages ({} compressed)' | |
| 2227 | + .format(n_infos, n_macros, n_storages, n_compressed)) | |
| 2223 | 2228 | self.ole_file.close() # just in case |
| 2224 | 2229 | self.ole_file = None # required to make other methods look at ole_subfiles |
| 2225 | 2230 | self.type = TYPE_PPT |
| 2226 | 2231 | except Exception as exc: |
| 2227 | - log.debug("File appears not to be a ppt file (%s)") | |
| 2228 | - log.debug('Exception from opening attempt:', exc_info=True) | |
| 2232 | + if self.container == 'PptParser': | |
| 2233 | + # this is a subfile of a ppt --> to be expected that is no ppt | |
| 2234 | + log.debug('PPT subfile is not a PPT file') | |
| 2235 | + else: | |
| 2236 | + log.debug("File appears not to be a ppt file (%s)" % exc) | |
| 2229 | 2237 | |
| 2230 | 2238 | |
| 2231 | 2239 | def open_text(self, data): | ... | ... |
oletools/ppt_parser.py
| ... | ... | @@ -2,7 +2,8 @@ |
| 2 | 2 | |
| 3 | 3 | Based on olefile, parse the ppt-specific info |
| 4 | 4 | |
| 5 | -Code much influenced by olevba._extract_vba | |
| 5 | +Code much influenced by olevba._extract_vba but much more object-oriented | |
| 6 | +(possibly slightly excessivly so) | |
| 6 | 7 | |
| 7 | 8 | Currently quite narrowly focused on extracting VBA from ppt files, no slides or |
| 8 | 9 | stuff, but built to be extended to parsing more/all of the file |
| ... | ... | @@ -125,16 +126,16 @@ class RecordHeader(object): |
| 125 | 126 | @classmethod |
| 126 | 127 | def extract_from(clz, stream): |
| 127 | 128 | """ reads 8 byte from stream """ |
| 128 | - log.debug('parsing RecordHeader from stream') | |
| 129 | + #log.debug('parsing RecordHeader from stream') | |
| 129 | 130 | obj = clz() |
| 130 | 131 | # first half byte is version, next 3 half bytes are instance |
| 131 | 132 | version_instance, = struct.unpack('<H', stream.read(2)) |
| 132 | 133 | obj.rec_instance, obj.rec_ver = divmod(version_instance, 2**4) |
| 133 | 134 | obj.rec_type, = struct.unpack('<H', stream.read(2)) |
| 134 | 135 | obj.rec_len, = struct.unpack('<L', stream.read(4)) |
| 135 | - log.debug('type is {0:04X}, instance {1:04X}, version {2:04X}, len {3}' | |
| 136 | - .format(obj.rec_type, obj.rec_instance, obj.rec_ver, | |
| 137 | - obj.rec_len)) | |
| 136 | + #log.debug('type is {0:04X}, instance {1:04X}, version {2:04X}, len {3}' | |
| 137 | + # .format(obj.rec_type, obj.rec_instance, obj.rec_ver, | |
| 138 | + # obj.rec_len)) | |
| 138 | 139 | return obj |
| 139 | 140 | |
| 140 | 141 | @classmethod |
| ... | ... | @@ -1057,8 +1058,8 @@ class PptParser(object): |
| 1057 | 1058 | # ['Current User'], |
| 1058 | 1059 | # ['PowerPoint Document']] |
| 1059 | 1060 | root_streams = self.ole.listdir() |
| 1060 | - for stream in root_streams: | |
| 1061 | - log.debug('found root stream {!r}'.format(stream)) | |
| 1061 | + #for stream in root_streams: | |
| 1062 | + # log.debug('found root stream {!r}'.format(stream)) | |
| 1062 | 1063 | if any(len(stream) != 1 for stream in root_streams): |
| 1063 | 1064 | self._fail('root', 'listdir', root_streams, 'len = 1') |
| 1064 | 1065 | root_streams = [stream[0].lower() for stream in root_streams] |
| ... | ... | @@ -1310,7 +1311,7 @@ class PptParser(object): |
| 1310 | 1311 | buf = stream.read(BUF_SIZE) |
| 1311 | 1312 | idx = buf.find(pattern) |
| 1312 | 1313 | while idx != -1: |
| 1313 | - log.info('found pattern at index {}'.format(start_pos+idx)) | |
| 1314 | + log.debug('found pattern at index {}'.format(start_pos+idx)) | |
| 1314 | 1315 | candidates.append(start_pos+idx) |
| 1315 | 1316 | idx = buf.find(pattern, idx+1) |
| 1316 | 1317 | |
| ... | ... | @@ -1335,6 +1336,8 @@ class PptParser(object): |
| 1335 | 1336 | .. seealso:: search_vba_storage |
| 1336 | 1337 | """ |
| 1337 | 1338 | |
| 1339 | + logging.debug('looking for VBA info containers') | |
| 1340 | + | |
| 1338 | 1341 | pattern = VBAInfoContainer.generate_pattern( |
| 1339 | 1342 | rec_len=VBAInfoContainer.RECORD_LENGTH) \ |
| 1340 | 1343 | + VBAInfoAtom.generate_pattern( |
| ... | ... | @@ -1352,7 +1355,7 @@ class PptParser(object): |
| 1352 | 1355 | for idx in candidates: |
| 1353 | 1356 | # assume that in stream at idx there is a VBAInfoContainer |
| 1354 | 1357 | stream.seek(idx) |
| 1355 | - log.info('extracting at idx {}'.format(idx)) | |
| 1358 | + log.debug('extracting at idx {}'.format(idx)) | |
| 1356 | 1359 | try: |
| 1357 | 1360 | container = VBAInfoContainer.extract_from(stream) |
| 1358 | 1361 | except Exception: |
| ... | ... | @@ -1363,9 +1366,9 @@ class PptParser(object): |
| 1363 | 1366 | if errs: |
| 1364 | 1367 | log.warning('check_validity found {} issues'.format(len(errs))) |
| 1365 | 1368 | else: |
| 1366 | - log.info('container is ok') | |
| 1369 | + log.debug('container is ok') | |
| 1367 | 1370 | atom = container.vba_info_atom |
| 1368 | - log.info('persist id ref is {}, has_macros {}, version {}' | |
| 1371 | + log.debug('persist id ref is {}, has_macros {}, version {}' | |
| 1369 | 1372 | .format(atom.persist_id_ref, atom.f_has_macros, |
| 1370 | 1373 | atom.version)) |
| 1371 | 1374 | containers.append(container) |
| ... | ... | @@ -1396,6 +1399,7 @@ class PptParser(object): |
| 1396 | 1399 | .. seealso:: :py:meth:`search_vba_info` |
| 1397 | 1400 | """ |
| 1398 | 1401 | |
| 1402 | + logging.debug('looking for VBA storage objects') | |
| 1399 | 1403 | stream = None |
| 1400 | 1404 | try: |
| 1401 | 1405 | log.debug('opening stream') |
| ... | ... | @@ -1415,7 +1419,7 @@ class PptParser(object): |
| 1415 | 1419 | for idx in candidates: |
| 1416 | 1420 | # assume a ExternalObjectStorage in stream at idx |
| 1417 | 1421 | stream.seek(idx) |
| 1418 | - log.info('extracting at idx {}'.format(idx)) | |
| 1422 | + log.debug('extracting at idx {}'.format(idx)) | |
| 1419 | 1423 | try: |
| 1420 | 1424 | storage = obj_type.extract_from(stream) |
| 1421 | 1425 | except Exception: |
| ... | ... | @@ -1427,11 +1431,11 @@ class PptParser(object): |
| 1427 | 1431 | log.warning('check_validity found {} issues' |
| 1428 | 1432 | .format(len(errs))) |
| 1429 | 1433 | else: |
| 1430 | - log.info('storage is ok; compressed={}, size={}, ' | |
| 1431 | - 'size_decomp={}' | |
| 1432 | - .format(storage.is_compressed, | |
| 1433 | - storage.rec_head.rec_len, | |
| 1434 | - storage.uncompressed_size)) | |
| 1434 | + log.debug('storage is ok; compressed={}, size={}, ' | |
| 1435 | + 'size_decomp={}' | |
| 1436 | + .format(storage.is_compressed, | |
| 1437 | + storage.rec_head.rec_len, | |
| 1438 | + storage.uncompressed_size)) | |
| 1435 | 1439 | storages.append(storage) |
| 1436 | 1440 | for err in errs: |
| 1437 | 1441 | log.warning('check_validity({}): {}' |
| ... | ... | @@ -1461,8 +1465,8 @@ class PptParser(object): |
| 1461 | 1465 | stream.seek(storage.data_offset, os.SEEK_SET) |
| 1462 | 1466 | decomp, n_read, err = \ |
| 1463 | 1467 | iterative_decompress(stream, storage.data_size) |
| 1464 | - log.info('decompressed {} to {} bytes, err is {}' | |
| 1465 | - .format(n_read, len(decomp), err)) | |
| 1468 | + log.debug('decompressed {} to {} bytes; found err: {}' | |
| 1469 | + .format(n_read, len(decomp), err)) | |
| 1466 | 1470 | if err and self.fast_fail: |
| 1467 | 1471 | raise err |
| 1468 | 1472 | # otherwise try to continue with partial data | ... | ... |