Commit 1360ca04cb69331aee592582352245ffc2414160

Authored by Christian Herdtweck
Committed by Philippe Lagadec
1 parent 1f43eab2

xls_parser: fix finding streams, also find orphans

Showing 1 changed file with 38 additions and 18 deletions
oletools/xls_parser.py
@@ -50,6 +50,7 @@ __version__ = '0.1' @@ -50,6 +50,7 @@ __version__ = '0.1'
50 #--- IMPORTS ------------------------------------------------------------------ 50 #--- IMPORTS ------------------------------------------------------------------
51 51
52 import sys 52 import sys
  53 +import os.path
53 54
54 # little hack to allow absolute imports even if oletools is not installed. 55 # little hack to allow absolute imports even if oletools is not installed.
55 # Copied from olevba.py 56 # Copied from olevba.py
@@ -61,36 +62,55 @@ if not _parent_dir in sys.path: @@ -61,36 +62,55 @@ if not _parent_dir in sys.path:
61 from oletools.thirdparty import olefile 62 from oletools.thirdparty import olefile
62 63
63 64
  65 +entry_type2str = {
  66 + olefile.STGTY_EMPTY: 'empty',
  67 + olefile.STGTY_STORAGE: 'storage',
  68 + olefile.STGTY_STREAM: 'stream',
  69 + olefile.STGTY_LOCKBYTES: 'lock-bytes',
  70 + olefile.STGTY_PROPERTY: 'property',
  71 + olefile.STGTY_ROOT: 'root'
  72 +}
  73 +
64 class XlsFile(olefile.OleFileIO): 74 class XlsFile(olefile.OleFileIO):
65 """ specialization of an OLE compound file """ 75 """ specialization of an OLE compound file """
66 76
67 - def get_streams_recursive(self, storage=None):  
68 - """ find all streams in all storages, depth-first """  
69 - if storage is None:  
70 - storage = self 77 + def get_streams(self):
  78 + """ find all streams, including orphans """
71 print('Finding streams in ole file') 79 print('Finding streams in ole file')
72 - for st in storage.listdir(streams=True, storages=True):  
73 - st_type = self.get_type(st)  
74 - if st_type == olefile.STGTY_STREAM: # a stream --> yield  
75 - print('Checking stream {0}'.format(st))  
76 - yield st  
77 - elif st_type == olefile.STGTY_STORAGE: # a storage --> recurse  
78 - print('Recurse into storage {0}'.format(st))  
79 - for entry in self.get_streams_recursive(st):  
80 - yield entry  
81 - else:  
82 - raise ValueError('unexpected type {0} for entry {1}'  
83 - .format(st_type, st)) 80 +
  81 + for sid, direntry in enumerate(self.direntries):
  82 + is_orphan = direntry is None
  83 + if is_orphan:
  84 + # this direntry is not part of the tree: either unused or an orphan
  85 + direntry = self._load_direntry(sid)
  86 + is_stream = direntry.entry_type == olefile.STGTY_STREAM
  87 + print('direntry {:2d} {}: {}'
  88 + .format(sid, '[orphan]' if is_orphan else direntry.name,
  89 + 'is stream of size {}'.format(direntry.size)
  90 + if is_stream else
  91 + 'no stream ({})'
  92 + .format(entry_type2str[direntry.entry_type])))
  93 + if is_stream:
  94 + yield XlsStream(self._open(direntry.isectStart, direntry.size))
84 95
85 96
86 class XlsStream: 97 class XlsStream:
87 """ specialization of an OLE (sub-)stream """ 98 """ specialization of an OLE (sub-)stream """
88 - pass 99 +
  100 + def __init__(self, stream):
  101 + self.stream = stream
89 102
90 103
91 def test(filename): 104 def test(filename):
92 """ parse given file and print rough structure """ 105 """ parse given file and print rough structure """
93 - pass 106 + try:
  107 + xls = XlsFile(filename)
  108 + except Exception as exc:
  109 + print('{}: {}'.format(filename, exc))
  110 + return
  111 +
  112 + for stream in xls.get_streams():
  113 + pass
94 114
95 if __name__ == '__main__': 115 if __name__ == '__main__':
96 """ parse all given file names and print rough structure """ 116 """ parse all given file names and print rough structure """