Commit 1f3a02a761af66398484a1f9f08139f0144c28e1
Committed by
Philippe Lagadec
1 parent
8a288366
msodde: improve finding streams in doc
Showing
1 changed file
with
22 additions
and
34 deletions
oletools/msodde.py
| ... | ... | @@ -428,7 +428,9 @@ def process_ole_stream(stream): |
| 428 | 428 | have_sep = True |
| 429 | 429 | elif char == OLE_FIELD_END: |
| 430 | 430 | # have complete field now, process it |
| 431 | - result_parts.append(process_ole_field(field_contents)) | |
| 431 | + new_result = process_ole_field(field_contents) | |
| 432 | + if new_result: | |
| 433 | + result_parts.append(new_result) | |
| 432 | 434 | |
| 433 | 435 | # re-set variables for next field |
| 434 | 436 | have_start = False |
| ... | ... | @@ -466,37 +468,6 @@ def process_ole_stream(stream): |
| 466 | 468 | return result_parts |
| 467 | 469 | |
| 468 | 470 | |
| 469 | -def process_ole_storage(ole): | |
| 470 | - """ process a "directory" inside an ole file; recursive """ | |
| 471 | - results = [] | |
| 472 | - for st in ole.listdir(streams=True, storages=True): | |
| 473 | - st_type = ole.get_type(st) | |
| 474 | - if st_type == olefile.STGTY_STREAM: # a stream | |
| 475 | - stream = None | |
| 476 | - links = [] | |
| 477 | - try: | |
| 478 | - stream = ole.openstream(st) | |
| 479 | - log.debug('Checking stream {0}'.format(st)) | |
| 480 | - links = process_ole_stream(stream) | |
| 481 | - except Exception: | |
| 482 | - raise | |
| 483 | - finally: | |
| 484 | - if stream: | |
| 485 | - stream.close() | |
| 486 | - if links: | |
| 487 | - results.extend(links) | |
| 488 | - elif st_type == olefile.STGTY_STORAGE: # a storage | |
| 489 | - log.debug('Checking storage {0}'.format(st)) | |
| 490 | - links = process_ole_storage(st) | |
| 491 | - if links: | |
| 492 | - results.extend(links) | |
| 493 | - else: | |
| 494 | - log.info('unexpected type {0} for entry {1}. Ignore it' | |
| 495 | - .format(st_type, st)) | |
| 496 | - continue | |
| 497 | - return results | |
| 498 | - | |
| 499 | - | |
| 500 | 471 | def process_ole(filepath): |
| 501 | 472 | """ |
| 502 | 473 | find dde links in ole file |
| ... | ... | @@ -507,10 +478,27 @@ def process_ole(filepath): |
| 507 | 478 | """ |
| 508 | 479 | log.debug('process_ole') |
| 509 | 480 | ole = olefile.OleFileIO(filepath, path_encoding=None) |
| 510 | - text_parts = process_ole_storage(ole) | |
| 481 | + | |
| 482 | + links = [] | |
| 483 | + for sid, direntry in enumerate(ole.direntries): | |
| 484 | + is_orphan = direntry is None | |
| 485 | + if is_orphan: | |
| 486 | + # this direntry is not part of the tree --> unused or orphan | |
| 487 | + direntry = ole._load_direntry(sid) | |
| 488 | + is_stream = direntry.entry_type == olefile.STGTY_STREAM | |
| 489 | + log.debug('direntry {:2d} {}: {}' | |
| 490 | + .format(sid, '[orphan]' if is_orphan else direntry.name, | |
| 491 | + 'is stream of size {}'.format(direntry.size) | |
| 492 | + if is_stream else | |
| 493 | + 'no stream ({})' | |
| 494 | + .format(direntry.entry_type))) | |
| 495 | + if is_stream: | |
| 496 | + new_parts = process_ole_stream( | |
| 497 | + ole._open(direntry.isectStart, direntry.size)) | |
| 498 | + links.extend(new_parts) | |
| 511 | 499 | |
| 512 | 500 | # mimic behaviour of process_docx: combine links to single text string |
| 513 | - return u'\n'.join(text_parts) | |
| 501 | + return u'\n'.join(links) | |
| 514 | 502 | |
| 515 | 503 | |
| 516 | 504 | def process_xls(filepath): | ... | ... |