Commit a60d9f3e64f95cc7f6d374df9fcefa154166d624

Authored by Christian Herdtweck
1 parent 42fbc3ee

tweaked logging (lots of info-->debug, some removed; added some comments

oletools/olevba.py
@@ -2198,34 +2198,42 @@ class VBA_Parser(object): @@ -2198,34 +2198,42 @@ class VBA_Parser(object):
2198 ole_subfiles (except find_vba_* which needs to explicitly check for 2198 ole_subfiles (except find_vba_* which needs to explicitly check for
2199 self.type) 2199 self.type)
2200 """ 2200 """
  2201 +
2201 log.info('Check whether OLE file is PPT') 2202 log.info('Check whether OLE file is PPT')
2202 ppt_parser.enable_logging() 2203 ppt_parser.enable_logging()
2203 try: 2204 try:
2204 ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True) 2205 ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True)
  2206 + info_container = ppt.search_vba_info()
  2207 + n_infos = len(info_container)
  2208 + n_macros = sum(1 for info in info_container
  2209 + if info.vba_info_atom.f_has_macros > 0)
2205 n_infos = len(ppt.search_vba_info()) 2210 n_infos = len(ppt.search_vba_info())
  2211 + # TODO: does it make sense at all to continue if n_macros == 0?
  2212 + # --> no vba-info, so all storages probably ActiveX or other OLE
2206 storages = ppt.search_vba_storage() 2213 storages = ppt.search_vba_storage()
2207 n_storages = len(storages) 2214 n_storages = len(storages)
2208 - log.debug('ppt: found {} infos and {} storages'.format(n_infos,  
2209 - n_storages))  
2210 - if n_infos != n_storages:  
2211 - # probably, some storages are ActiveX or other OLE types  
2212 - log.warning('ppt: found different number of vba infos ({} and '  
2213 - 'storages ({}) --> subfiles might make trouble'  
2214 - .format(n_infos, n_storages)) 2215 + n_compressed = 0
2215 for storage in storages: 2216 for storage in storages:
2216 if storage.is_compressed: 2217 if storage.is_compressed:
2217 storage_decomp = ppt.decompress_vba_storage(storage) 2218 storage_decomp = ppt.decompress_vba_storage(storage)
  2219 + n_compressed += 1
2218 else: 2220 else:
2219 log.warning('just guessing here: decompressed storage = storage?') 2221 log.warning('just guessing here: decompressed storage = storage?')
2220 storage_decomp = storage.read_all() # not implemented yet 2222 storage_decomp = storage.read_all() # not implemented yet
2221 self.ole_subfiles.append(VBA_Parser(None, storage_decomp, 2223 self.ole_subfiles.append(VBA_Parser(None, storage_decomp,
2222 container='PptParser')) 2224 container='PptParser'))
  2225 + log.info('File is PPT with {} vba infos ({} with macros) and {} '
  2226 + 'vba storages ({} compressed)'
  2227 + .format(n_infos, n_macros, n_storages, n_compressed))
2223 self.ole_file.close() # just in case 2228 self.ole_file.close() # just in case
2224 self.ole_file = None # required to make other methods look at ole_subfiles 2229 self.ole_file = None # required to make other methods look at ole_subfiles
2225 self.type = TYPE_PPT 2230 self.type = TYPE_PPT
2226 except Exception as exc: 2231 except Exception as exc:
2227 - log.debug("File appears not to be a ppt file (%s)")  
2228 - log.debug('Exception from opening attempt:', exc_info=True) 2232 + if self.container == 'PptParser':
  2233 + # this is a subfile of a ppt --> to be expected that is no ppt
  2234 + log.debug('PPT subfile is not a PPT file')
  2235 + else:
  2236 + log.debug("File appears not to be a ppt file (%s)" % exc)
2229 2237
2230 2238
2231 def open_text(self, data): 2239 def open_text(self, data):
oletools/ppt_parser.py
@@ -2,7 +2,8 @@ @@ -2,7 +2,8 @@
2 2
3 Based on olefile, parse the ppt-specific info 3 Based on olefile, parse the ppt-specific info
4 4
5 -Code much influenced by olevba._extract_vba 5 +Code much influenced by olevba._extract_vba but much more object-oriented
  6 +(possibly slightly excessivly so)
6 7
7 Currently quite narrowly focused on extracting VBA from ppt files, no slides or 8 Currently quite narrowly focused on extracting VBA from ppt files, no slides or
8 stuff, but built to be extended to parsing more/all of the file 9 stuff, but built to be extended to parsing more/all of the file
@@ -125,16 +126,16 @@ class RecordHeader(object): @@ -125,16 +126,16 @@ class RecordHeader(object):
125 @classmethod 126 @classmethod
126 def extract_from(clz, stream): 127 def extract_from(clz, stream):
127 """ reads 8 byte from stream """ 128 """ reads 8 byte from stream """
128 - log.debug('parsing RecordHeader from stream') 129 + #log.debug('parsing RecordHeader from stream')
129 obj = clz() 130 obj = clz()
130 # first half byte is version, next 3 half bytes are instance 131 # first half byte is version, next 3 half bytes are instance
131 version_instance, = struct.unpack('<H', stream.read(2)) 132 version_instance, = struct.unpack('<H', stream.read(2))
132 obj.rec_instance, obj.rec_ver = divmod(version_instance, 2**4) 133 obj.rec_instance, obj.rec_ver = divmod(version_instance, 2**4)
133 obj.rec_type, = struct.unpack('<H', stream.read(2)) 134 obj.rec_type, = struct.unpack('<H', stream.read(2))
134 obj.rec_len, = struct.unpack('<L', stream.read(4)) 135 obj.rec_len, = struct.unpack('<L', stream.read(4))
135 - log.debug('type is {0:04X}, instance {1:04X}, version {2:04X}, len {3}'  
136 - .format(obj.rec_type, obj.rec_instance, obj.rec_ver,  
137 - obj.rec_len)) 136 + #log.debug('type is {0:04X}, instance {1:04X}, version {2:04X}, len {3}'
  137 + # .format(obj.rec_type, obj.rec_instance, obj.rec_ver,
  138 + # obj.rec_len))
138 return obj 139 return obj
139 140
140 @classmethod 141 @classmethod
@@ -1057,8 +1058,8 @@ class PptParser(object): @@ -1057,8 +1058,8 @@ class PptParser(object):
1057 # ['Current User'], 1058 # ['Current User'],
1058 # ['PowerPoint Document']] 1059 # ['PowerPoint Document']]
1059 root_streams = self.ole.listdir() 1060 root_streams = self.ole.listdir()
1060 - for stream in root_streams:  
1061 - log.debug('found root stream {!r}'.format(stream)) 1061 + #for stream in root_streams:
  1062 + # log.debug('found root stream {!r}'.format(stream))
1062 if any(len(stream) != 1 for stream in root_streams): 1063 if any(len(stream) != 1 for stream in root_streams):
1063 self._fail('root', 'listdir', root_streams, 'len = 1') 1064 self._fail('root', 'listdir', root_streams, 'len = 1')
1064 root_streams = [stream[0].lower() for stream in root_streams] 1065 root_streams = [stream[0].lower() for stream in root_streams]
@@ -1310,7 +1311,7 @@ class PptParser(object): @@ -1310,7 +1311,7 @@ class PptParser(object):
1310 buf = stream.read(BUF_SIZE) 1311 buf = stream.read(BUF_SIZE)
1311 idx = buf.find(pattern) 1312 idx = buf.find(pattern)
1312 while idx != -1: 1313 while idx != -1:
1313 - log.info('found pattern at index {}'.format(start_pos+idx)) 1314 + log.debug('found pattern at index {}'.format(start_pos+idx))
1314 candidates.append(start_pos+idx) 1315 candidates.append(start_pos+idx)
1315 idx = buf.find(pattern, idx+1) 1316 idx = buf.find(pattern, idx+1)
1316 1317
@@ -1335,6 +1336,8 @@ class PptParser(object): @@ -1335,6 +1336,8 @@ class PptParser(object):
1335 .. seealso:: search_vba_storage 1336 .. seealso:: search_vba_storage
1336 """ 1337 """
1337 1338
  1339 + logging.debug('looking for VBA info containers')
  1340 +
1338 pattern = VBAInfoContainer.generate_pattern( 1341 pattern = VBAInfoContainer.generate_pattern(
1339 rec_len=VBAInfoContainer.RECORD_LENGTH) \ 1342 rec_len=VBAInfoContainer.RECORD_LENGTH) \
1340 + VBAInfoAtom.generate_pattern( 1343 + VBAInfoAtom.generate_pattern(
@@ -1352,7 +1355,7 @@ class PptParser(object): @@ -1352,7 +1355,7 @@ class PptParser(object):
1352 for idx in candidates: 1355 for idx in candidates:
1353 # assume that in stream at idx there is a VBAInfoContainer 1356 # assume that in stream at idx there is a VBAInfoContainer
1354 stream.seek(idx) 1357 stream.seek(idx)
1355 - log.info('extracting at idx {}'.format(idx)) 1358 + log.debug('extracting at idx {}'.format(idx))
1356 try: 1359 try:
1357 container = VBAInfoContainer.extract_from(stream) 1360 container = VBAInfoContainer.extract_from(stream)
1358 except Exception: 1361 except Exception:
@@ -1363,9 +1366,9 @@ class PptParser(object): @@ -1363,9 +1366,9 @@ class PptParser(object):
1363 if errs: 1366 if errs:
1364 log.warning('check_validity found {} issues'.format(len(errs))) 1367 log.warning('check_validity found {} issues'.format(len(errs)))
1365 else: 1368 else:
1366 - log.info('container is ok') 1369 + log.debug('container is ok')
1367 atom = container.vba_info_atom 1370 atom = container.vba_info_atom
1368 - log.info('persist id ref is {}, has_macros {}, version {}' 1371 + log.debug('persist id ref is {}, has_macros {}, version {}'
1369 .format(atom.persist_id_ref, atom.f_has_macros, 1372 .format(atom.persist_id_ref, atom.f_has_macros,
1370 atom.version)) 1373 atom.version))
1371 containers.append(container) 1374 containers.append(container)
@@ -1396,6 +1399,7 @@ class PptParser(object): @@ -1396,6 +1399,7 @@ class PptParser(object):
1396 .. seealso:: :py:meth:`search_vba_info` 1399 .. seealso:: :py:meth:`search_vba_info`
1397 """ 1400 """
1398 1401
  1402 + logging.debug('looking for VBA storage objects')
1399 stream = None 1403 stream = None
1400 try: 1404 try:
1401 log.debug('opening stream') 1405 log.debug('opening stream')
@@ -1415,7 +1419,7 @@ class PptParser(object): @@ -1415,7 +1419,7 @@ class PptParser(object):
1415 for idx in candidates: 1419 for idx in candidates:
1416 # assume a ExternalObjectStorage in stream at idx 1420 # assume a ExternalObjectStorage in stream at idx
1417 stream.seek(idx) 1421 stream.seek(idx)
1418 - log.info('extracting at idx {}'.format(idx)) 1422 + log.debug('extracting at idx {}'.format(idx))
1419 try: 1423 try:
1420 storage = obj_type.extract_from(stream) 1424 storage = obj_type.extract_from(stream)
1421 except Exception: 1425 except Exception:
@@ -1427,11 +1431,11 @@ class PptParser(object): @@ -1427,11 +1431,11 @@ class PptParser(object):
1427 log.warning('check_validity found {} issues' 1431 log.warning('check_validity found {} issues'
1428 .format(len(errs))) 1432 .format(len(errs)))
1429 else: 1433 else:
1430 - log.info('storage is ok; compressed={}, size={}, '  
1431 - 'size_decomp={}'  
1432 - .format(storage.is_compressed,  
1433 - storage.rec_head.rec_len,  
1434 - storage.uncompressed_size)) 1434 + log.debug('storage is ok; compressed={}, size={}, '
  1435 + 'size_decomp={}'
  1436 + .format(storage.is_compressed,
  1437 + storage.rec_head.rec_len,
  1438 + storage.uncompressed_size))
1435 storages.append(storage) 1439 storages.append(storage)
1436 for err in errs: 1440 for err in errs:
1437 log.warning('check_validity({}): {}' 1441 log.warning('check_validity({}): {}'
@@ -1461,8 +1465,8 @@ class PptParser(object): @@ -1461,8 +1465,8 @@ class PptParser(object):
1461 stream.seek(storage.data_offset, os.SEEK_SET) 1465 stream.seek(storage.data_offset, os.SEEK_SET)
1462 decomp, n_read, err = \ 1466 decomp, n_read, err = \
1463 iterative_decompress(stream, storage.data_size) 1467 iterative_decompress(stream, storage.data_size)
1464 - log.info('decompressed {} to {} bytes, err is {}'  
1465 - .format(n_read, len(decomp), err)) 1468 + log.debug('decompressed {} to {} bytes; found err: {}'
  1469 + .format(n_read, len(decomp), err))
1466 if err and self.fast_fail: 1470 if err and self.fast_fail:
1467 raise err 1471 raise err
1468 # otherwise try to continue with partial data 1472 # otherwise try to continue with partial data