Commit a7d1050e9129bb0d9a3019eecc75553e63a50b9a

Authored by Christian Herdtweck
1 parent 1ee956aa

oleobj: fix logging --> log and make it lazy where possible

Also remove 1 exception from output and add a comment
Showing 1 changed file with 31 additions and 29 deletions
oletools/oleobj.py
@@ -275,8 +275,8 @@ def guess_encoding(data): @@ -275,8 +275,8 @@ def guess_encoding(data):
275 return result 275 return result
276 except UnicodeError: 276 except UnicodeError:
277 pass 277 pass
278 - logging.warning('failed to guess encoding for string, falling back to '  
279 - 'ascii with replace') 278 + log.warning('failed to guess encoding for string, falling back to '
  279 + 'ascii with replace')
280 return data.decode('ascii', errors='replace') 280 return data.decode('ascii', errors='replace')
281 281
282 282
@@ -387,7 +387,7 @@ class OleNativeStream(object): @@ -387,7 +387,7 @@ class OleNativeStream(object):
387 # TODO: there can be extra data, no idea what it is for 387 # TODO: there can be extra data, no idea what it is for
388 # TODO: SLACK DATA 388 # TODO: SLACK DATA
389 except (IOError, struct.error): # no data to read actual_size 389 except (IOError, struct.error): # no data to read actual_size
390 - logging.debug('data is not embedded but only a link') 390 + log.debug('data is not embedded but only a link')
391 self.is_link = True 391 self.is_link = True
392 self.actual_size = 0 392 self.actual_size = 0
393 self.data = None 393 self.data = None
@@ -441,20 +441,20 @@ class OleObject(object): @@ -441,20 +441,20 @@ class OleObject(object):
441 index = 0 441 index = 0
442 self.ole_version, index = read_uint32(data, index) 442 self.ole_version, index = read_uint32(data, index)
443 self.format_id, index = read_uint32(data, index) 443 self.format_id, index = read_uint32(data, index)
444 - log.debug('OLE version=%08X - Format ID=%08X'  
445 - % (self.ole_version, self.format_id)) 444 + log.debug('OLE version=%08X - Format ID=%08X',
  445 + self.ole_version, self.format_id)
446 assert self.format_id in (self.TYPE_EMBEDDED, self.TYPE_LINKED) 446 assert self.format_id in (self.TYPE_EMBEDDED, self.TYPE_LINKED)
447 self.class_name, index = read_length_prefixed_string(data, index) 447 self.class_name, index = read_length_prefixed_string(data, index)
448 self.topic_name, index = read_length_prefixed_string(data, index) 448 self.topic_name, index = read_length_prefixed_string(data, index)
449 self.item_name, index = read_length_prefixed_string(data, index) 449 self.item_name, index = read_length_prefixed_string(data, index)
450 - log.debug('Class name=%r - Topic name=%r - Item name=%r'  
451 - % (self.class_name, self.topic_name, self.item_name)) 450 + log.debug('Class name=%r - Topic name=%r - Item name=%r',
  451 + self.class_name, self.topic_name, self.item_name)
452 if self.format_id == self.TYPE_EMBEDDED: 452 if self.format_id == self.TYPE_EMBEDDED:
453 # Embedded object: see MS-OLEDS 2.2.5 EmbeddedObject 453 # Embedded object: see MS-OLEDS 2.2.5 EmbeddedObject
454 # assert self.topic_name != '' and self.item_name != '' 454 # assert self.topic_name != '' and self.item_name != ''
455 self.data_size, index = read_uint32(data, index) 455 self.data_size, index = read_uint32(data, index)
456 - log.debug('Declared data size=%d - remaining size=%d'  
457 - % (self.data_size, len(data)-index)) 456 + log.debug('Declared data size=%d - remaining size=%d',
  457 + self.data_size, len(data)-index)
458 # TODO: handle incorrect size to avoid exception 458 # TODO: handle incorrect size to avoid exception
459 self.data = data[index:index+self.data_size] 459 self.data = data[index:index+self.data_size]
460 assert len(self.data) == self.data_size 460 assert len(self.data) == self.data_size
@@ -498,9 +498,9 @@ def find_ole_in_ppt(filename): @@ -498,9 +498,9 @@ def find_ole_in_ppt(filename):
498 ole = record.get_data_as_olefile() 498 ole = record.get_data_as_olefile()
499 yield ole 499 yield ole
500 except IOError: 500 except IOError:
501 - logging.warning('Error reading data from {0} stream or '  
502 - 'interpreting it as OLE object'  
503 - .format(stream.name), exc_info=True) 501 + log.warning('Error reading data from {0} stream or '
  502 + 'interpreting it as OLE object'
  503 + .format(stream.name), exc_info=True)
504 finally: 504 finally:
505 if ole is not None: 505 if ole is not None:
506 ole.close() 506 ole.close()
@@ -515,22 +515,22 @@ def find_ole(filename, data): @@ -515,22 +515,22 @@ def find_ole(filename, data):
515 try: 515 try:
516 if data is not None: 516 if data is not None:
517 # assume data is a complete OLE file 517 # assume data is a complete OLE file
518 - logging.info('working on raw OLE data (filename: {0})'  
519 - .format(filename)) 518 + log.info('working on raw OLE data (filename: {0})'
  519 + .format(filename))
520 yield olefile.OleFileIO(data) 520 yield olefile.OleFileIO(data)
521 elif olefile.isOleFile(filename): 521 elif olefile.isOleFile(filename):
522 if is_ppt(filename): 522 if is_ppt(filename):
523 - logging.info('is ppt file: ' + filename) 523 + log.info('is ppt file: ' + filename)
524 for ole in find_ole_in_ppt(filename): 524 for ole in find_ole_in_ppt(filename):
525 yield ole 525 yield ole
526 ole.close() 526 ole.close()
527 else: 527 else:
528 - logging.info('is ole file: ' + filename) 528 + log.info('is ole file: ' + filename)
529 ole = olefile.OleFileIO(filename) 529 ole = olefile.OleFileIO(filename)
530 yield ole 530 yield ole
531 ole.close() 531 ole.close()
532 elif is_zipfile(filename): 532 elif is_zipfile(filename):
533 - logging.info('is zip file: ' + filename) 533 + log.info('is zip file: ' + filename)
534 zipper = ZipFile(filename, 'r') 534 zipper = ZipFile(filename, 'r')
535 for subfile in zipper.namelist(): 535 for subfile in zipper.namelist():
536 head = b'' 536 head = b''
@@ -538,24 +538,24 @@ def find_ole(filename, data): @@ -538,24 +538,24 @@ def find_ole(filename, data):
538 with zipper.open(subfile) as file_handle: 538 with zipper.open(subfile) as file_handle:
539 head = file_handle.read(len(olefile.MAGIC)) 539 head = file_handle.read(len(olefile.MAGIC))
540 except RuntimeError: 540 except RuntimeError:
541 - logging.error('zip is encrypted: ' + filename) 541 + log.error('zip is encrypted: ' + filename)
542 yield None 542 yield None
543 continue 543 continue
544 544
545 if head == olefile.MAGIC: 545 if head == olefile.MAGIC:
546 - logging.info(' unzipping ole: ' + subfile) 546 + log.info(' unzipping ole: ' + subfile)
547 with zipper.open(subfile) as file_handle: 547 with zipper.open(subfile) as file_handle:
548 ole = olefile.OleFileIO(file_handle) 548 ole = olefile.OleFileIO(file_handle)
549 yield ole 549 yield ole
550 ole.close() 550 ole.close()
551 else: 551 else:
552 - logging.debug('unzip skip: ' + subfile) 552 + log.debug('unzip skip: ' + subfile)
553 else: 553 else:
554 - logging.warning('open failed: ' + filename) 554 + log.warning('open failed: ' + filename)
555 yield None # --> leads to non-0 return code 555 yield None # --> leads to non-0 return code
556 except Exception: 556 except Exception:
557 - logging.error('Caught exception opening {0}'.format(filename),  
558 - exc_info=True) 557 + log.error('Caught exception opening {0}'.format(filename),
  558 + exc_info=True)
559 yield None # --> leads to non-0 return code but try next file first 559 yield None # --> leads to non-0 return code but try next file first
560 560
561 561
@@ -571,7 +571,7 @@ def process_file(filename, data, output_dir=None): @@ -571,7 +571,7 @@ def process_file(filename, data, output_dir=None):
571 """ 571 """
572 if output_dir: 572 if output_dir:
573 if not os.path.isdir(output_dir): 573 if not os.path.isdir(output_dir):
574 - log.info('creating output directory %s' % output_dir) 574 + log.info('creating output directory %s', output_dir)
575 os.mkdir(output_dir) 575 os.mkdir(output_dir)
576 576
577 fname_prefix = os.path.join(output_dir, 577 fname_prefix = os.path.join(output_dir,
@@ -592,6 +592,8 @@ def process_file(filename, data, output_dir=None): @@ -592,6 +592,8 @@ def process_file(filename, data, output_dir=None):
592 did_dump = False 592 did_dump = False
593 593
594 # look for ole files inside file (e.g. unzip docx) 594 # look for ole files inside file (e.g. unzip docx)
  595 + # have to finish work on every ole stream inside iteration, since handles
  596 + # are closed in find_ole
595 for ole in find_ole(filename, data): 597 for ole in find_ole(filename, data):
596 if ole is None: # no ole file found 598 if ole is None: # no ole file found
597 continue 599 continue
@@ -599,7 +601,7 @@ def process_file(filename, data, output_dir=None): @@ -599,7 +601,7 @@ def process_file(filename, data, output_dir=None):
599 for path_parts in ole.listdir(): 601 for path_parts in ole.listdir():
600 if path_parts[-1] == '\x01Ole10Native': 602 if path_parts[-1] == '\x01Ole10Native':
601 stream_path = '/'.join(path_parts) 603 stream_path = '/'.join(path_parts)
602 - log.debug('Checking stream %r' % stream_path) 604 + log.debug('Checking stream %r', stream_path)
603 stream = None 605 stream = None
604 try: 606 try:
605 stream = ole.openstream(path_parts) 607 stream = ole.openstream(path_parts)
@@ -608,8 +610,8 @@ def process_file(filename, data, output_dir=None): @@ -608,8 +610,8 @@ def process_file(filename, data, output_dir=None):
608 print('Parsing OLE Package') 610 print('Parsing OLE Package')
609 opkg = OleNativeStream(stream) 611 opkg = OleNativeStream(stream)
610 # leave stream open until dumping is finished 612 # leave stream open until dumping is finished
611 - except Exception as exc:  
612 - log.warning('*** Not an OLE 1.0 Object ({0})'.format(exc)) 613 + except Exception:
  614 + log.warning('*** Not an OLE 1.0 Object')
613 err_stream = True 615 err_stream = True
614 if stream is not None: 616 if stream is not None:
615 stream.close() 617 stream.close()
@@ -640,8 +642,8 @@ def process_file(filename, data, output_dir=None): @@ -640,8 +642,8 @@ def process_file(filename, data, output_dir=None):
640 writer.write(data) 642 writer.write(data)
641 n_dumped += len(data) 643 n_dumped += len(data)
642 if len(data) != next_size: 644 if len(data) != next_size:
643 - logging.warning('Wanted to read {0}, got {1}'  
644 - .format(next_size, len(data))) 645 + log.warning('Wanted to read {0}, got {1}'
  646 + .format(next_size, len(data)))
645 break 647 break
646 next_size = min(DUMP_CHUNK_SIZE, 648 next_size = min(DUMP_CHUNK_SIZE,
647 opkg.actual_size - n_dumped) 649 opkg.actual_size - n_dumped)