Commit 1f3a02a761af66398484a1f9f08139f0144c28e1

Authored by Christian Herdtweck
Committed by Philippe Lagadec
1 parent 8a288366

msodde: improve finding streams in doc

Showing 1 changed file with 22 additions and 34 deletions
oletools/msodde.py
@@ -428,7 +428,9 @@ def process_ole_stream(stream): @@ -428,7 +428,9 @@ def process_ole_stream(stream):
428 have_sep = True 428 have_sep = True
429 elif char == OLE_FIELD_END: 429 elif char == OLE_FIELD_END:
430 # have complete field now, process it 430 # have complete field now, process it
431 - result_parts.append(process_ole_field(field_contents)) 431 + new_result = process_ole_field(field_contents)
  432 + if new_result:
  433 + result_parts.append(new_result)
432 434
433 # re-set variables for next field 435 # re-set variables for next field
434 have_start = False 436 have_start = False
@@ -466,37 +468,6 @@ def process_ole_stream(stream): @@ -466,37 +468,6 @@ def process_ole_stream(stream):
466 return result_parts 468 return result_parts
467 469
468 470
469 -def process_ole_storage(ole):  
470 - """ process a "directory" inside an ole file; recursive """  
471 - results = []  
472 - for st in ole.listdir(streams=True, storages=True):  
473 - st_type = ole.get_type(st)  
474 - if st_type == olefile.STGTY_STREAM: # a stream  
475 - stream = None  
476 - links = []  
477 - try:  
478 - stream = ole.openstream(st)  
479 - log.debug('Checking stream {0}'.format(st))  
480 - links = process_ole_stream(stream)  
481 - except Exception:  
482 - raise  
483 - finally:  
484 - if stream:  
485 - stream.close()  
486 - if links:  
487 - results.extend(links)  
488 - elif st_type == olefile.STGTY_STORAGE: # a storage  
489 - log.debug('Checking storage {0}'.format(st))  
490 - links = process_ole_storage(st)  
491 - if links:  
492 - results.extend(links)  
493 - else:  
494 - log.info('unexpected type {0} for entry {1}. Ignore it'  
495 - .format(st_type, st))  
496 - continue  
497 - return results  
498 -  
499 -  
500 def process_ole(filepath): 471 def process_ole(filepath):
501 """ 472 """
502 find dde links in ole file 473 find dde links in ole file
@@ -507,10 +478,27 @@ def process_ole(filepath): @@ -507,10 +478,27 @@ def process_ole(filepath):
507 """ 478 """
508 log.debug('process_ole') 479 log.debug('process_ole')
509 ole = olefile.OleFileIO(filepath, path_encoding=None) 480 ole = olefile.OleFileIO(filepath, path_encoding=None)
510 - text_parts = process_ole_storage(ole) 481 +
  482 + links = []
  483 + for sid, direntry in enumerate(ole.direntries):
  484 + is_orphan = direntry is None
  485 + if is_orphan:
  486 + # this direntry is not part of the tree --> unused or orphan
  487 + direntry = ole._load_direntry(sid)
  488 + is_stream = direntry.entry_type == olefile.STGTY_STREAM
  489 + log.debug('direntry {:2d} {}: {}'
  490 + .format(sid, '[orphan]' if is_orphan else direntry.name,
  491 + 'is stream of size {}'.format(direntry.size)
  492 + if is_stream else
  493 + 'no stream ({})'
  494 + .format(direntry.entry_type)))
  495 + if is_stream:
  496 + new_parts = process_ole_stream(
  497 + ole._open(direntry.isectStart, direntry.size))
  498 + links.extend(new_parts)
511 499
512 # mimic behaviour of process_docx: combine links to single text string 500 # mimic behaviour of process_docx: combine links to single text string
513 - return u'\n'.join(text_parts) 501 + return u'\n'.join(links)
514 502
515 503
516 def process_xls(filepath): 504 def process_xls(filepath):