Commit 1360ca04cb69331aee592582352245ffc2414160
Committed by
Philippe Lagadec
1 parent
1f43eab2
xls_parser: fix finding streams, also find orphans
Showing
1 changed file
with
38 additions
and
18 deletions
oletools/xls_parser.py
| @@ -50,6 +50,7 @@ __version__ = '0.1' | @@ -50,6 +50,7 @@ __version__ = '0.1' | ||
| 50 | #--- IMPORTS ------------------------------------------------------------------ | 50 | #--- IMPORTS ------------------------------------------------------------------ |
| 51 | 51 | ||
| 52 | import sys | 52 | import sys |
| 53 | +import os.path | ||
| 53 | 54 | ||
| 54 | # little hack to allow absolute imports even if oletools is not installed. | 55 | # little hack to allow absolute imports even if oletools is not installed. |
| 55 | # Copied from olevba.py | 56 | # Copied from olevba.py |
| @@ -61,36 +62,55 @@ if not _parent_dir in sys.path: | @@ -61,36 +62,55 @@ if not _parent_dir in sys.path: | ||
| 61 | from oletools.thirdparty import olefile | 62 | from oletools.thirdparty import olefile |
| 62 | 63 | ||
| 63 | 64 | ||
| 65 | +entry_type2str = { | ||
| 66 | + olefile.STGTY_EMPTY: 'empty', | ||
| 67 | + olefile.STGTY_STORAGE: 'storage', | ||
| 68 | + olefile.STGTY_STREAM: 'stream', | ||
| 69 | + olefile.STGTY_LOCKBYTES: 'lock-bytes', | ||
| 70 | + olefile.STGTY_PROPERTY: 'property', | ||
| 71 | + olefile.STGTY_ROOT: 'root' | ||
| 72 | +} | ||
| 73 | + | ||
| 64 | class XlsFile(olefile.OleFileIO): | 74 | class XlsFile(olefile.OleFileIO): |
| 65 | """ specialization of an OLE compound file """ | 75 | """ specialization of an OLE compound file """ |
| 66 | 76 | ||
| 67 | - def get_streams_recursive(self, storage=None): | ||
| 68 | - """ find all streams in all storages, depth-first """ | ||
| 69 | - if storage is None: | ||
| 70 | - storage = self | 77 | + def get_streams(self): |
| 78 | + """ find all streams, including orphans """ | ||
| 71 | print('Finding streams in ole file') | 79 | print('Finding streams in ole file') |
| 72 | - for st in storage.listdir(streams=True, storages=True): | ||
| 73 | - st_type = self.get_type(st) | ||
| 74 | - if st_type == olefile.STGTY_STREAM: # a stream --> yield | ||
| 75 | - print('Checking stream {0}'.format(st)) | ||
| 76 | - yield st | ||
| 77 | - elif st_type == olefile.STGTY_STORAGE: # a storage --> recurse | ||
| 78 | - print('Recurse into storage {0}'.format(st)) | ||
| 79 | - for entry in self.get_streams_recursive(st): | ||
| 80 | - yield entry | ||
| 81 | - else: | ||
| 82 | - raise ValueError('unexpected type {0} for entry {1}' | ||
| 83 | - .format(st_type, st)) | 80 | + |
| 81 | + for sid, direntry in enumerate(self.direntries): | ||
| 82 | + is_orphan = direntry is None | ||
| 83 | + if is_orphan: | ||
| 84 | + # this direntry is not part of the tree: either unused or an orphan | ||
| 85 | + direntry = self._load_direntry(sid) | ||
| 86 | + is_stream = direntry.entry_type == olefile.STGTY_STREAM | ||
| 87 | + print('direntry {:2d} {}: {}' | ||
| 88 | + .format(sid, '[orphan]' if is_orphan else direntry.name, | ||
| 89 | + 'is stream of size {}'.format(direntry.size) | ||
| 90 | + if is_stream else | ||
| 91 | + 'no stream ({})' | ||
| 92 | + .format(entry_type2str[direntry.entry_type]))) | ||
| 93 | + if is_stream: | ||
| 94 | + yield XlsStream(self._open(direntry.isectStart, direntry.size)) | ||
| 84 | 95 | ||
| 85 | 96 | ||
| 86 | class XlsStream: | 97 | class XlsStream: |
| 87 | """ specialization of an OLE (sub-)stream """ | 98 | """ specialization of an OLE (sub-)stream """ |
| 88 | - pass | 99 | + |
| 100 | + def __init__(self, stream): | ||
| 101 | + self.stream = stream | ||
| 89 | 102 | ||
| 90 | 103 | ||
| 91 | def test(filename): | 104 | def test(filename): |
| 92 | """ parse given file and print rough structure """ | 105 | """ parse given file and print rough structure """ |
| 93 | - pass | 106 | + try: |
| 107 | + xls = XlsFile(filename) | ||
| 108 | + except Exception as exc: | ||
| 109 | + print('{}: {}'.format(filename, exc)) | ||
| 110 | + return | ||
| 111 | + | ||
| 112 | + for stream in xls.get_streams(): | ||
| 113 | + pass | ||
| 94 | 114 | ||
| 95 | if __name__ == '__main__': | 115 | if __name__ == '__main__': |
| 96 | """ parse all given file names and print rough structure """ | 116 | """ parse all given file names and print rough structure """ |