Commit 1f3a02a761af66398484a1f9f08139f0144c28e1

Authored by Christian Herdtweck
Committed by Philippe Lagadec
1 parent 8a288366

msodde: improve finding streams in doc

Showing 1 changed file with 22 additions and 34 deletions
oletools/msodde.py
... ... @@ -428,7 +428,9 @@ def process_ole_stream(stream):
428 428 have_sep = True
429 429 elif char == OLE_FIELD_END:
430 430 # have complete field now, process it
431   - result_parts.append(process_ole_field(field_contents))
  431 + new_result = process_ole_field(field_contents)
  432 + if new_result:
  433 + result_parts.append(new_result)
432 434  
433 435 # re-set variables for next field
434 436 have_start = False
... ... @@ -466,37 +468,6 @@ def process_ole_stream(stream):
466 468 return result_parts
467 469  
468 470  
469   -def process_ole_storage(ole):
470   - """ process a "directory" inside an ole file; recursive """
471   - results = []
472   - for st in ole.listdir(streams=True, storages=True):
473   - st_type = ole.get_type(st)
474   - if st_type == olefile.STGTY_STREAM: # a stream
475   - stream = None
476   - links = []
477   - try:
478   - stream = ole.openstream(st)
479   - log.debug('Checking stream {0}'.format(st))
480   - links = process_ole_stream(stream)
481   - except Exception:
482   - raise
483   - finally:
484   - if stream:
485   - stream.close()
486   - if links:
487   - results.extend(links)
488   - elif st_type == olefile.STGTY_STORAGE: # a storage
489   - log.debug('Checking storage {0}'.format(st))
490   - links = process_ole_storage(st)
491   - if links:
492   - results.extend(links)
493   - else:
494   - log.info('unexpected type {0} for entry {1}. Ignore it'
495   - .format(st_type, st))
496   - continue
497   - return results
498   -
499   -
500 471 def process_ole(filepath):
501 472 """
502 473 find dde links in ole file
... ... @@ -507,10 +478,27 @@ def process_ole(filepath):
507 478 """
508 479 log.debug('process_ole')
509 480 ole = olefile.OleFileIO(filepath, path_encoding=None)
510   - text_parts = process_ole_storage(ole)
  481 +
  482 + links = []
  483 + for sid, direntry in enumerate(ole.direntries):
  484 + is_orphan = direntry is None
  485 + if is_orphan:
  486 + # this direntry is not part of the tree --> unused or orphan
  487 + direntry = ole._load_direntry(sid)
  488 + is_stream = direntry.entry_type == olefile.STGTY_STREAM
  489 + log.debug('direntry {:2d} {}: {}'
  490 + .format(sid, '[orphan]' if is_orphan else direntry.name,
  491 + 'is stream of size {}'.format(direntry.size)
  492 + if is_stream else
  493 + 'no stream ({})'
  494 + .format(direntry.entry_type)))
  495 + if is_stream:
  496 + new_parts = process_ole_stream(
  497 + ole._open(direntry.isectStart, direntry.size))
  498 + links.extend(new_parts)
511 499  
512 500 # mimic behaviour of process_docx: combine links to single text string
513   - return u'\n'.join(text_parts)
  501 + return u'\n'.join(links)
514 502  
515 503  
516 504 def process_xls(filepath):
... ...