Commit 1360ca04cb69331aee592582352245ffc2414160

Authored by Christian Herdtweck
Committed by Philippe Lagadec
1 parent 1f43eab2

xls_parser: fix finding streams, also find orphans

Showing 1 changed file with 38 additions and 18 deletions
oletools/xls_parser.py
... ... @@ -50,6 +50,7 @@ __version__ = '0.1'
50 50 #--- IMPORTS ------------------------------------------------------------------
51 51  
52 52 import sys
  53 +import os.path
53 54  
54 55 # little hack to allow absolute imports even if oletools is not installed.
55 56 # Copied from olevba.py
... ... @@ -61,36 +62,55 @@ if not _parent_dir in sys.path:
61 62 from oletools.thirdparty import olefile
62 63  
63 64  
  65 +entry_type2str = {
  66 + olefile.STGTY_EMPTY: 'empty',
  67 + olefile.STGTY_STORAGE: 'storage',
  68 + olefile.STGTY_STREAM: 'stream',
  69 + olefile.STGTY_LOCKBYTES: 'lock-bytes',
  70 + olefile.STGTY_PROPERTY: 'property',
  71 + olefile.STGTY_ROOT: 'root'
  72 +}
  73 +
64 74 class XlsFile(olefile.OleFileIO):
65 75 """ specialization of an OLE compound file """
66 76  
67   - def get_streams_recursive(self, storage=None):
68   - """ find all streams in all storages, depth-first """
69   - if storage is None:
70   - storage = self
  77 + def get_streams(self):
  78 + """ find all streams, including orphans """
71 79 print('Finding streams in ole file')
72   - for st in storage.listdir(streams=True, storages=True):
73   - st_type = self.get_type(st)
74   - if st_type == olefile.STGTY_STREAM: # a stream --> yield
75   - print('Checking stream {0}'.format(st))
76   - yield st
77   - elif st_type == olefile.STGTY_STORAGE: # a storage --> recurse
78   - print('Recurse into storage {0}'.format(st))
79   - for entry in self.get_streams_recursive(st):
80   - yield entry
81   - else:
82   - raise ValueError('unexpected type {0} for entry {1}'
83   - .format(st_type, st))
  80 +
  81 + for sid, direntry in enumerate(self.direntries):
  82 + is_orphan = direntry is None
  83 + if is_orphan:
  84 + # this direntry is not part of the tree: either unused or an orphan
  85 + direntry = self._load_direntry(sid)
  86 + is_stream = direntry.entry_type == olefile.STGTY_STREAM
  87 + print('direntry {:2d} {}: {}'
  88 + .format(sid, '[orphan]' if is_orphan else direntry.name,
  89 + 'is stream of size {}'.format(direntry.size)
  90 + if is_stream else
  91 + 'no stream ({})'
  92 + .format(entry_type2str[direntry.entry_type])))
  93 + if is_stream:
  94 + yield XlsStream(self._open(direntry.isectStart, direntry.size))
84 95  
85 96  
86 97 class XlsStream:
87 98 """ specialization of an OLE (sub-)stream """
88   - pass
  99 +
  100 + def __init__(self, stream):
  101 + self.stream = stream
89 102  
90 103  
91 104 def test(filename):
92 105 """ parse given file and print rough structure """
93   - pass
  106 + try:
  107 + xls = XlsFile(filename)
  108 + except Exception as exc:
  109 + print('{}: {}'.format(filename, exc))
  110 + return
  111 +
  112 + for stream in xls.get_streams():
  113 + pass
94 114  
95 115 if __name__ == '__main__':
96 116 """ parse all given file names and print rough structure """
... ...