Commit 1360ca04cb69331aee592582352245ffc2414160
Committed by
Philippe Lagadec
1 parent
1f43eab2
xls_parser: fix finding streams, also find orphans
Showing
1 changed file
with
38 additions
and
18 deletions
oletools/xls_parser.py
| ... | ... | @@ -50,6 +50,7 @@ __version__ = '0.1' |
| 50 | 50 | #--- IMPORTS ------------------------------------------------------------------ |
| 51 | 51 | |
| 52 | 52 | import sys |
| 53 | +import os.path | |
| 53 | 54 | |
| 54 | 55 | # little hack to allow absolute imports even if oletools is not installed. |
| 55 | 56 | # Copied from olevba.py |
| ... | ... | @@ -61,36 +62,55 @@ if not _parent_dir in sys.path: |
| 61 | 62 | from oletools.thirdparty import olefile |
| 62 | 63 | |
| 63 | 64 | |
| 65 | +entry_type2str = { | |
| 66 | + olefile.STGTY_EMPTY: 'empty', | |
| 67 | + olefile.STGTY_STORAGE: 'storage', | |
| 68 | + olefile.STGTY_STREAM: 'stream', | |
| 69 | + olefile.STGTY_LOCKBYTES: 'lock-bytes', | |
| 70 | + olefile.STGTY_PROPERTY: 'property', | |
| 71 | + olefile.STGTY_ROOT: 'root' | |
| 72 | +} | |
| 73 | + | |
| 64 | 74 | class XlsFile(olefile.OleFileIO): |
| 65 | 75 | """ specialization of an OLE compound file """ |
| 66 | 76 | |
| 67 | - def get_streams_recursive(self, storage=None): | |
| 68 | - """ find all streams in all storages, depth-first """ | |
| 69 | - if storage is None: | |
| 70 | - storage = self | |
| 77 | + def get_streams(self): | |
| 78 | + """ find all streams, including orphans """ | |
| 71 | 79 | print('Finding streams in ole file') |
| 72 | - for st in storage.listdir(streams=True, storages=True): | |
| 73 | - st_type = self.get_type(st) | |
| 74 | - if st_type == olefile.STGTY_STREAM: # a stream --> yield | |
| 75 | - print('Checking stream {0}'.format(st)) | |
| 76 | - yield st | |
| 77 | - elif st_type == olefile.STGTY_STORAGE: # a storage --> recurse | |
| 78 | - print('Recurse into storage {0}'.format(st)) | |
| 79 | - for entry in self.get_streams_recursive(st): | |
| 80 | - yield entry | |
| 81 | - else: | |
| 82 | - raise ValueError('unexpected type {0} for entry {1}' | |
| 83 | - .format(st_type, st)) | |
| 80 | + | |
| 81 | + for sid, direntry in enumerate(self.direntries): | |
| 82 | + is_orphan = direntry is None | |
| 83 | + if is_orphan: | |
| 84 | + # this direntry is not part of the tree: either unused or an orphan | |
| 85 | + direntry = self._load_direntry(sid) | |
| 86 | + is_stream = direntry.entry_type == olefile.STGTY_STREAM | |
| 87 | + print('direntry {:2d} {}: {}' | |
| 88 | + .format(sid, '[orphan]' if is_orphan else direntry.name, | |
| 89 | + 'is stream of size {}'.format(direntry.size) | |
| 90 | + if is_stream else | |
| 91 | + 'no stream ({})' | |
| 92 | + .format(entry_type2str[direntry.entry_type]))) | |
| 93 | + if is_stream: | |
| 94 | + yield XlsStream(self._open(direntry.isectStart, direntry.size)) | |
| 84 | 95 | |
| 85 | 96 | |
| 86 | 97 | class XlsStream: |
| 87 | 98 | """ specialization of an OLE (sub-)stream """ |
| 88 | - pass | |
| 99 | + | |
| 100 | + def __init__(self, stream): | |
| 101 | + self.stream = stream | |
| 89 | 102 | |
| 90 | 103 | |
| 91 | 104 | def test(filename): |
| 92 | 105 | """ parse given file and print rough structure """ |
| 93 | - pass | |
| 106 | + try: | |
| 107 | + xls = XlsFile(filename) | |
| 108 | + except Exception as exc: | |
| 109 | + print('{}: {}'.format(filename, exc)) | |
| 110 | + return | |
| 111 | + | |
| 112 | + for stream in xls.get_streams(): | |
| 113 | + pass | |
| 94 | 114 | |
| 95 | 115 | if __name__ == '__main__': |
| 96 | 116 | """ parse all given file names and print rough structure """ | ... | ... |