Commit a2c8fb9bcd189d6b9ed9c5f3a1c5cd2edf5b290e

Authored by Christian Herdtweck
Committed by Philippe Lagadec
1 parent 014d3b47

Start creating parser for xls files e.g. for finding dde links

Showing 1 changed file with 98 additions and 0 deletions
oletools/xls_parser.py 0 → 100644
  1 +""" Parse xls up to some point
  2 +
  3 +Read storages, (sub-)streams, records from xls file
  4 +"""
  5 +#
  6 +# === LICENSE ==================================================================
  7 +
  8 +# xls_parser is copyright (c) 2014-2017 Philippe Lagadec (http://www.decalage.info)
  9 +# All rights reserved.
  10 +#
  11 +# Redistribution and use in source and binary forms, with or without modification,
  12 +# are permitted provided that the following conditions are met:
  13 +#
  14 +# * Redistributions of source code must retain the above copyright notice, this
  15 +# list of conditions and the following disclaimer.
  16 +# * Redistributions in binary form must reproduce the above copyright notice,
  17 +# this list of conditions and the following disclaimer in the documentation
  18 +# and/or other materials provided with the distribution.
  19 +#
  20 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  21 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  22 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  24 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  26 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  27 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  28 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30 +
  31 +from __future__ import print_function
  32 +
  33 +#------------------------------------------------------------------------------
  34 +# CHANGELOG:
  35 +# 2017-11-02 v0.01 CH: - first version
  36 +
  37 +__version__ = '0.1'
  38 +
  39 +#------------------------------------------------------------------------------
  40 +# TODO:
  41 +# everything
  42 +#
  43 +#------------------------------------------------------------------------------
  44 +# REFERENCES:
  45 +# - [MS-XLS]: Excel Binary File Format (.xls) Structure Specification
  46 +# https://msdn.microsoft.com/en-us/library/office/cc313154(v=office.14).aspx
  47 +# - Understanding the Excel .xls Binary File Format
  48 +# https://msdn.microsoft.com/en-us/library/office/gg615597(v=office.14).aspx
  49 +#
  50 +#--- IMPORTS ------------------------------------------------------------------
  51 +
  52 +import sys
  53 +
  54 +# little hack to allow absolute imports even if oletools is not installed.
  55 +# Copied from olevba.py
  56 +_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
  57 +_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
  58 +if not _parent_dir in sys.path:
  59 + sys.path.insert(0, _parent_dir)
  60 +
  61 +from oletools.thirdparty import olefile
  62 +
  63 +
  64 +class XlsFile(olefile.OleFileIO):
  65 + """ specialization of an OLE compound file """
  66 +
  67 + def get_streams_recursive(self, storage=None):
  68 + """ find all streams in all storages, depth-first """
  69 + if storage is None:
  70 + storage = self
  71 + print('Finding streams in ole file')
  72 + for st in storage.listdir(streams=True, storages=True):
  73 + st_type = self.get_type(st)
  74 + if st_type == olefile.STGTY_STREAM: # a stream --> yield
  75 + print('Checking stream {0}'.format(st))
  76 + yield st
  77 + elif st_type == olefile.STGTY_STORAGE: # a storage --> recurse
  78 + print('Recurse into storage {0}'.format(st))
  79 + for entry in self.get_streams_recursive(st):
  80 + yield entry
  81 + else:
  82 + raise ValueError('unexpected type {0} for entry {1}'
  83 + .format(st_type, st))
  84 +
  85 +
  86 +class XlsStream:
  87 + """ specialization of an OLE (sub-)stream """
  88 + pass
  89 +
  90 +
  91 +def test(filename):
  92 + """ parse given file and print rough structure """
  93 + pass
  94 +
  95 +if __name__ == '__main__':
  96 + """ parse all given file names and print rough structure """
  97 + for filename in sys.argv[1:]:
  98 + test(filename)
... ...