Commit a60d9f3e64f95cc7f6d374df9fcefa154166d624

Authored by Christian Herdtweck
1 parent 42fbc3ee

tweaked logging (lots of info-->debug, some removed; added some comments

oletools/olevba.py
... ... @@ -2198,34 +2198,42 @@ class VBA_Parser(object):
2198 2198 ole_subfiles (except find_vba_* which needs to explicitly check for
2199 2199 self.type)
2200 2200 """
  2201 +
2201 2202 log.info('Check whether OLE file is PPT')
2202 2203 ppt_parser.enable_logging()
2203 2204 try:
2204 2205 ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True)
  2206 + info_container = ppt.search_vba_info()
  2207 + n_infos = len(info_container)
  2208 + n_macros = sum(1 for info in info_container
  2209 + if info.vba_info_atom.f_has_macros > 0)
2205 2210 n_infos = len(ppt.search_vba_info())
  2211 + # TODO: does it make sense at all to continue if n_macros == 0?
  2212 + # --> no vba-info, so all storages probably ActiveX or other OLE
2206 2213 storages = ppt.search_vba_storage()
2207 2214 n_storages = len(storages)
2208   - log.debug('ppt: found {} infos and {} storages'.format(n_infos,
2209   - n_storages))
2210   - if n_infos != n_storages:
2211   - # probably, some storages are ActiveX or other OLE types
2212   - log.warning('ppt: found different number of vba infos ({} and '
2213   - 'storages ({}) --> subfiles might make trouble'
2214   - .format(n_infos, n_storages))
  2215 + n_compressed = 0
2215 2216 for storage in storages:
2216 2217 if storage.is_compressed:
2217 2218 storage_decomp = ppt.decompress_vba_storage(storage)
  2219 + n_compressed += 1
2218 2220 else:
2219 2221 log.warning('just guessing here: decompressed storage = storage?')
2220 2222 storage_decomp = storage.read_all() # not implemented yet
2221 2223 self.ole_subfiles.append(VBA_Parser(None, storage_decomp,
2222 2224 container='PptParser'))
  2225 + log.info('File is PPT with {} vba infos ({} with macros) and {} '
  2226 + 'vba storages ({} compressed)'
  2227 + .format(n_infos, n_macros, n_storages, n_compressed))
2223 2228 self.ole_file.close() # just in case
2224 2229 self.ole_file = None # required to make other methods look at ole_subfiles
2225 2230 self.type = TYPE_PPT
2226 2231 except Exception as exc:
2227   - log.debug("File appears not to be a ppt file (%s)")
2228   - log.debug('Exception from opening attempt:', exc_info=True)
  2232 + if self.container == 'PptParser':
  2233 + # this is a subfile of a ppt --> to be expected that is no ppt
  2234 + log.debug('PPT subfile is not a PPT file')
  2235 + else:
  2236 + log.debug("File appears not to be a ppt file (%s)" % exc)
2229 2237  
2230 2238  
2231 2239 def open_text(self, data):
... ...
oletools/ppt_parser.py
... ... @@ -2,7 +2,8 @@
2 2  
3 3 Based on olefile, parse the ppt-specific info
4 4  
5   -Code much influenced by olevba._extract_vba
  5 +Code much influenced by olevba._extract_vba but much more object-oriented
  6 +(possibly slightly excessivly so)
6 7  
7 8 Currently quite narrowly focused on extracting VBA from ppt files, no slides or
8 9 stuff, but built to be extended to parsing more/all of the file
... ... @@ -125,16 +126,16 @@ class RecordHeader(object):
125 126 @classmethod
126 127 def extract_from(clz, stream):
127 128 """ reads 8 byte from stream """
128   - log.debug('parsing RecordHeader from stream')
  129 + #log.debug('parsing RecordHeader from stream')
129 130 obj = clz()
130 131 # first half byte is version, next 3 half bytes are instance
131 132 version_instance, = struct.unpack('<H', stream.read(2))
132 133 obj.rec_instance, obj.rec_ver = divmod(version_instance, 2**4)
133 134 obj.rec_type, = struct.unpack('<H', stream.read(2))
134 135 obj.rec_len, = struct.unpack('<L', stream.read(4))
135   - log.debug('type is {0:04X}, instance {1:04X}, version {2:04X}, len {3}'
136   - .format(obj.rec_type, obj.rec_instance, obj.rec_ver,
137   - obj.rec_len))
  136 + #log.debug('type is {0:04X}, instance {1:04X}, version {2:04X}, len {3}'
  137 + # .format(obj.rec_type, obj.rec_instance, obj.rec_ver,
  138 + # obj.rec_len))
138 139 return obj
139 140  
140 141 @classmethod
... ... @@ -1057,8 +1058,8 @@ class PptParser(object):
1057 1058 # ['Current User'],
1058 1059 # ['PowerPoint Document']]
1059 1060 root_streams = self.ole.listdir()
1060   - for stream in root_streams:
1061   - log.debug('found root stream {!r}'.format(stream))
  1061 + #for stream in root_streams:
  1062 + # log.debug('found root stream {!r}'.format(stream))
1062 1063 if any(len(stream) != 1 for stream in root_streams):
1063 1064 self._fail('root', 'listdir', root_streams, 'len = 1')
1064 1065 root_streams = [stream[0].lower() for stream in root_streams]
... ... @@ -1310,7 +1311,7 @@ class PptParser(object):
1310 1311 buf = stream.read(BUF_SIZE)
1311 1312 idx = buf.find(pattern)
1312 1313 while idx != -1:
1313   - log.info('found pattern at index {}'.format(start_pos+idx))
  1314 + log.debug('found pattern at index {}'.format(start_pos+idx))
1314 1315 candidates.append(start_pos+idx)
1315 1316 idx = buf.find(pattern, idx+1)
1316 1317  
... ... @@ -1335,6 +1336,8 @@ class PptParser(object):
1335 1336 .. seealso:: search_vba_storage
1336 1337 """
1337 1338  
  1339 + logging.debug('looking for VBA info containers')
  1340 +
1338 1341 pattern = VBAInfoContainer.generate_pattern(
1339 1342 rec_len=VBAInfoContainer.RECORD_LENGTH) \
1340 1343 + VBAInfoAtom.generate_pattern(
... ... @@ -1352,7 +1355,7 @@ class PptParser(object):
1352 1355 for idx in candidates:
1353 1356 # assume that in stream at idx there is a VBAInfoContainer
1354 1357 stream.seek(idx)
1355   - log.info('extracting at idx {}'.format(idx))
  1358 + log.debug('extracting at idx {}'.format(idx))
1356 1359 try:
1357 1360 container = VBAInfoContainer.extract_from(stream)
1358 1361 except Exception:
... ... @@ -1363,9 +1366,9 @@ class PptParser(object):
1363 1366 if errs:
1364 1367 log.warning('check_validity found {} issues'.format(len(errs)))
1365 1368 else:
1366   - log.info('container is ok')
  1369 + log.debug('container is ok')
1367 1370 atom = container.vba_info_atom
1368   - log.info('persist id ref is {}, has_macros {}, version {}'
  1371 + log.debug('persist id ref is {}, has_macros {}, version {}'
1369 1372 .format(atom.persist_id_ref, atom.f_has_macros,
1370 1373 atom.version))
1371 1374 containers.append(container)
... ... @@ -1396,6 +1399,7 @@ class PptParser(object):
1396 1399 .. seealso:: :py:meth:`search_vba_info`
1397 1400 """
1398 1401  
  1402 + logging.debug('looking for VBA storage objects')
1399 1403 stream = None
1400 1404 try:
1401 1405 log.debug('opening stream')
... ... @@ -1415,7 +1419,7 @@ class PptParser(object):
1415 1419 for idx in candidates:
1416 1420 # assume a ExternalObjectStorage in stream at idx
1417 1421 stream.seek(idx)
1418   - log.info('extracting at idx {}'.format(idx))
  1422 + log.debug('extracting at idx {}'.format(idx))
1419 1423 try:
1420 1424 storage = obj_type.extract_from(stream)
1421 1425 except Exception:
... ... @@ -1427,11 +1431,11 @@ class PptParser(object):
1427 1431 log.warning('check_validity found {} issues'
1428 1432 .format(len(errs)))
1429 1433 else:
1430   - log.info('storage is ok; compressed={}, size={}, '
1431   - 'size_decomp={}'
1432   - .format(storage.is_compressed,
1433   - storage.rec_head.rec_len,
1434   - storage.uncompressed_size))
  1434 + log.debug('storage is ok; compressed={}, size={}, '
  1435 + 'size_decomp={}'
  1436 + .format(storage.is_compressed,
  1437 + storage.rec_head.rec_len,
  1438 + storage.uncompressed_size))
1435 1439 storages.append(storage)
1436 1440 for err in errs:
1437 1441 log.warning('check_validity({}): {}'
... ... @@ -1461,8 +1465,8 @@ class PptParser(object):
1461 1465 stream.seek(storage.data_offset, os.SEEK_SET)
1462 1466 decomp, n_read, err = \
1463 1467 iterative_decompress(stream, storage.data_size)
1464   - log.info('decompressed {} to {} bytes, err is {}'
1465   - .format(n_read, len(decomp), err))
  1468 + log.debug('decompressed {} to {} bytes; found err: {}'
  1469 + .format(n_read, len(decomp), err))
1466 1470 if err and self.fast_fail:
1467 1471 raise err
1468 1472 # otherwise try to continue with partial data
... ...