Commit a7d1050e9129bb0d9a3019eecc75553e63a50b9a

Authored by Christian Herdtweck
1 parent 1ee956aa

oleobj: fix logging --> log and make it lazy where possible

Also remove 1 exception from output and add a comment
Showing 1 changed file with 31 additions and 29 deletions
oletools/oleobj.py
... ... @@ -275,8 +275,8 @@ def guess_encoding(data):
275 275 return result
276 276 except UnicodeError:
277 277 pass
278   - logging.warning('failed to guess encoding for string, falling back to '
279   - 'ascii with replace')
  278 + log.warning('failed to guess encoding for string, falling back to '
  279 + 'ascii with replace')
280 280 return data.decode('ascii', errors='replace')
281 281  
282 282  
... ... @@ -387,7 +387,7 @@ class OleNativeStream(object):
387 387 # TODO: there can be extra data, no idea what it is for
388 388 # TODO: SLACK DATA
389 389 except (IOError, struct.error): # no data to read actual_size
390   - logging.debug('data is not embedded but only a link')
  390 + log.debug('data is not embedded but only a link')
391 391 self.is_link = True
392 392 self.actual_size = 0
393 393 self.data = None
... ... @@ -441,20 +441,20 @@ class OleObject(object):
441 441 index = 0
442 442 self.ole_version, index = read_uint32(data, index)
443 443 self.format_id, index = read_uint32(data, index)
444   - log.debug('OLE version=%08X - Format ID=%08X'
445   - % (self.ole_version, self.format_id))
  444 + log.debug('OLE version=%08X - Format ID=%08X',
  445 + self.ole_version, self.format_id)
446 446 assert self.format_id in (self.TYPE_EMBEDDED, self.TYPE_LINKED)
447 447 self.class_name, index = read_length_prefixed_string(data, index)
448 448 self.topic_name, index = read_length_prefixed_string(data, index)
449 449 self.item_name, index = read_length_prefixed_string(data, index)
450   - log.debug('Class name=%r - Topic name=%r - Item name=%r'
451   - % (self.class_name, self.topic_name, self.item_name))
  450 + log.debug('Class name=%r - Topic name=%r - Item name=%r',
  451 + self.class_name, self.topic_name, self.item_name)
452 452 if self.format_id == self.TYPE_EMBEDDED:
453 453 # Embedded object: see MS-OLEDS 2.2.5 EmbeddedObject
454 454 # assert self.topic_name != '' and self.item_name != ''
455 455 self.data_size, index = read_uint32(data, index)
456   - log.debug('Declared data size=%d - remaining size=%d'
457   - % (self.data_size, len(data)-index))
  456 + log.debug('Declared data size=%d - remaining size=%d',
  457 + self.data_size, len(data)-index)
458 458 # TODO: handle incorrect size to avoid exception
459 459 self.data = data[index:index+self.data_size]
460 460 assert len(self.data) == self.data_size
... ... @@ -498,9 +498,9 @@ def find_ole_in_ppt(filename):
498 498 ole = record.get_data_as_olefile()
499 499 yield ole
500 500 except IOError:
501   - logging.warning('Error reading data from {0} stream or '
502   - 'interpreting it as OLE object'
503   - .format(stream.name), exc_info=True)
  501 + log.warning('Error reading data from {0} stream or '
  502 + 'interpreting it as OLE object'
  503 + .format(stream.name), exc_info=True)
504 504 finally:
505 505 if ole is not None:
506 506 ole.close()
... ... @@ -515,22 +515,22 @@ def find_ole(filename, data):
515 515 try:
516 516 if data is not None:
517 517 # assume data is a complete OLE file
518   - logging.info('working on raw OLE data (filename: {0})'
519   - .format(filename))
  518 + log.info('working on raw OLE data (filename: {0})'
  519 + .format(filename))
520 520 yield olefile.OleFileIO(data)
521 521 elif olefile.isOleFile(filename):
522 522 if is_ppt(filename):
523   - logging.info('is ppt file: ' + filename)
  523 + log.info('is ppt file: ' + filename)
524 524 for ole in find_ole_in_ppt(filename):
525 525 yield ole
526 526 ole.close()
527 527 else:
528   - logging.info('is ole file: ' + filename)
  528 + log.info('is ole file: ' + filename)
529 529 ole = olefile.OleFileIO(filename)
530 530 yield ole
531 531 ole.close()
532 532 elif is_zipfile(filename):
533   - logging.info('is zip file: ' + filename)
  533 + log.info('is zip file: ' + filename)
534 534 zipper = ZipFile(filename, 'r')
535 535 for subfile in zipper.namelist():
536 536 head = b''
... ... @@ -538,24 +538,24 @@ def find_ole(filename, data):
538 538 with zipper.open(subfile) as file_handle:
539 539 head = file_handle.read(len(olefile.MAGIC))
540 540 except RuntimeError:
541   - logging.error('zip is encrypted: ' + filename)
  541 + log.error('zip is encrypted: ' + filename)
542 542 yield None
543 543 continue
544 544  
545 545 if head == olefile.MAGIC:
546   - logging.info(' unzipping ole: ' + subfile)
  546 + log.info(' unzipping ole: ' + subfile)
547 547 with zipper.open(subfile) as file_handle:
548 548 ole = olefile.OleFileIO(file_handle)
549 549 yield ole
550 550 ole.close()
551 551 else:
552   - logging.debug('unzip skip: ' + subfile)
  552 + log.debug('unzip skip: ' + subfile)
553 553 else:
554   - logging.warning('open failed: ' + filename)
  554 + log.warning('open failed: ' + filename)
555 555 yield None # --> leads to non-0 return code
556 556 except Exception:
557   - logging.error('Caught exception opening {0}'.format(filename),
558   - exc_info=True)
  557 + log.error('Caught exception opening {0}'.format(filename),
  558 + exc_info=True)
559 559 yield None # --> leads to non-0 return code but try next file first
560 560  
561 561  
... ... @@ -571,7 +571,7 @@ def process_file(filename, data, output_dir=None):
571 571 """
572 572 if output_dir:
573 573 if not os.path.isdir(output_dir):
574   - log.info('creating output directory %s' % output_dir)
  574 + log.info('creating output directory %s', output_dir)
575 575 os.mkdir(output_dir)
576 576  
577 577 fname_prefix = os.path.join(output_dir,
... ... @@ -592,6 +592,8 @@ def process_file(filename, data, output_dir=None):
592 592 did_dump = False
593 593  
594 594 # look for ole files inside file (e.g. unzip docx)
  595 + # have to finish work on every ole stream inside iteration, since handles
  596 + # are closed in find_ole
595 597 for ole in find_ole(filename, data):
596 598 if ole is None: # no ole file found
597 599 continue
... ... @@ -599,7 +601,7 @@ def process_file(filename, data, output_dir=None):
599 601 for path_parts in ole.listdir():
600 602 if path_parts[-1] == '\x01Ole10Native':
601 603 stream_path = '/'.join(path_parts)
602   - log.debug('Checking stream %r' % stream_path)
  604 + log.debug('Checking stream %r', stream_path)
603 605 stream = None
604 606 try:
605 607 stream = ole.openstream(path_parts)
... ... @@ -608,8 +610,8 @@ def process_file(filename, data, output_dir=None):
608 610 print('Parsing OLE Package')
609 611 opkg = OleNativeStream(stream)
610 612 # leave stream open until dumping is finished
611   - except Exception as exc:
612   - log.warning('*** Not an OLE 1.0 Object ({0})'.format(exc))
  613 + except Exception:
  614 + log.warning('*** Not an OLE 1.0 Object')
613 615 err_stream = True
614 616 if stream is not None:
615 617 stream.close()
... ... @@ -640,8 +642,8 @@ def process_file(filename, data, output_dir=None):
640 642 writer.write(data)
641 643 n_dumped += len(data)
642 644 if len(data) != next_size:
643   - logging.warning('Wanted to read {0}, got {1}'
644   - .format(next_size, len(data)))
  645 + log.warning('Wanted to read {0}, got {1}'
  646 + .format(next_size, len(data)))
645 647 break
646 648 next_size = min(DUMP_CHUNK_SIZE,
647 649 opkg.actual_size - n_dumped)
... ...