Commit 1f3a02a761af66398484a1f9f08139f0144c28e1
Committed by
Philippe Lagadec
1 parent
8a288366
msodde: improve finding streams in doc
Showing
1 changed file
with
22 additions
and
34 deletions
oletools/msodde.py
| @@ -428,7 +428,9 @@ def process_ole_stream(stream): | @@ -428,7 +428,9 @@ def process_ole_stream(stream): | ||
| 428 | have_sep = True | 428 | have_sep = True |
| 429 | elif char == OLE_FIELD_END: | 429 | elif char == OLE_FIELD_END: |
| 430 | # have complete field now, process it | 430 | # have complete field now, process it |
| 431 | - result_parts.append(process_ole_field(field_contents)) | 431 | + new_result = process_ole_field(field_contents) |
| 432 | + if new_result: | ||
| 433 | + result_parts.append(new_result) | ||
| 432 | 434 | ||
| 433 | # re-set variables for next field | 435 | # re-set variables for next field |
| 434 | have_start = False | 436 | have_start = False |
| @@ -466,37 +468,6 @@ def process_ole_stream(stream): | @@ -466,37 +468,6 @@ def process_ole_stream(stream): | ||
| 466 | return result_parts | 468 | return result_parts |
| 467 | 469 | ||
| 468 | 470 | ||
| 469 | -def process_ole_storage(ole): | ||
| 470 | - """ process a "directory" inside an ole file; recursive """ | ||
| 471 | - results = [] | ||
| 472 | - for st in ole.listdir(streams=True, storages=True): | ||
| 473 | - st_type = ole.get_type(st) | ||
| 474 | - if st_type == olefile.STGTY_STREAM: # a stream | ||
| 475 | - stream = None | ||
| 476 | - links = [] | ||
| 477 | - try: | ||
| 478 | - stream = ole.openstream(st) | ||
| 479 | - log.debug('Checking stream {0}'.format(st)) | ||
| 480 | - links = process_ole_stream(stream) | ||
| 481 | - except Exception: | ||
| 482 | - raise | ||
| 483 | - finally: | ||
| 484 | - if stream: | ||
| 485 | - stream.close() | ||
| 486 | - if links: | ||
| 487 | - results.extend(links) | ||
| 488 | - elif st_type == olefile.STGTY_STORAGE: # a storage | ||
| 489 | - log.debug('Checking storage {0}'.format(st)) | ||
| 490 | - links = process_ole_storage(st) | ||
| 491 | - if links: | ||
| 492 | - results.extend(links) | ||
| 493 | - else: | ||
| 494 | - log.info('unexpected type {0} for entry {1}. Ignore it' | ||
| 495 | - .format(st_type, st)) | ||
| 496 | - continue | ||
| 497 | - return results | ||
| 498 | - | ||
| 499 | - | ||
| 500 | def process_ole(filepath): | 471 | def process_ole(filepath): |
| 501 | """ | 472 | """ |
| 502 | find dde links in ole file | 473 | find dde links in ole file |
| @@ -507,10 +478,27 @@ def process_ole(filepath): | @@ -507,10 +478,27 @@ def process_ole(filepath): | ||
| 507 | """ | 478 | """ |
| 508 | log.debug('process_ole') | 479 | log.debug('process_ole') |
| 509 | ole = olefile.OleFileIO(filepath, path_encoding=None) | 480 | ole = olefile.OleFileIO(filepath, path_encoding=None) |
| 510 | - text_parts = process_ole_storage(ole) | 481 | + |
| 482 | + links = [] | ||
| 483 | + for sid, direntry in enumerate(ole.direntries): | ||
| 484 | + is_orphan = direntry is None | ||
| 485 | + if is_orphan: | ||
| 486 | + # this direntry is not part of the tree --> unused or orphan | ||
| 487 | + direntry = ole._load_direntry(sid) | ||
| 488 | + is_stream = direntry.entry_type == olefile.STGTY_STREAM | ||
| 489 | + log.debug('direntry {:2d} {}: {}' | ||
| 490 | + .format(sid, '[orphan]' if is_orphan else direntry.name, | ||
| 491 | + 'is stream of size {}'.format(direntry.size) | ||
| 492 | + if is_stream else | ||
| 493 | + 'no stream ({})' | ||
| 494 | + .format(direntry.entry_type))) | ||
| 495 | + if is_stream: | ||
| 496 | + new_parts = process_ole_stream( | ||
| 497 | + ole._open(direntry.isectStart, direntry.size)) | ||
| 498 | + links.extend(new_parts) | ||
| 511 | 499 | ||
| 512 | # mimic behaviour of process_docx: combine links to single text string | 500 | # mimic behaviour of process_docx: combine links to single text string |
| 513 | - return u'\n'.join(text_parts) | 501 | + return u'\n'.join(links) |
| 514 | 502 | ||
| 515 | 503 | ||
| 516 | def process_xls(filepath): | 504 | def process_xls(filepath): |