rtfobj: extract OLE 1.0 objects and files from OLE Package objects, improved CLI…

… options and logging. Added new module oleobj to parse OLE structures.

rtfobj: extract OLE 1.0 objects and files from OLE Package objects, improved CLI…
… options and logging. Added new module oleobj to parse OLE structures.
Philippe Lagadec
1 parent 43f6a95c
Showing 2 changed files with 554 additions and 15 deletions
oletools/oleobj.py
oletools/rtfobj.py
+#!/usr/bin/env python
+"""
+oleobj.py
+
+oleobj is a Python script and module to parse OLE objects and files stored
+into various file formats such as RTF or MS Office documents (e.g. Word, Excel).
+
+Author: Philippe Lagadec - http://www.decalage.info
+License: BSD, see source code or documentation
+
+oleobj is part of the python-oletools package:
+http://www.decalage.info/python/oletools
+"""
+
+# === LICENSE ==================================================================
+
+# oleobj is copyright (c) 2015 Philippe Lagadec (http://www.decalage.info)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#------------------------------------------------------------------------------
+# CHANGELOG:
+# 2015-12-05 v0.01 PL: - first version
+
+__version__ = '0.01'
+
+#------------------------------------------------------------------------------
+# TODO:
+# + setup logging (common with other oletools)
+
+
+#------------------------------------------------------------------------------
+# REFERENCES:
+
+# Reference for the storage of embedded OLE objects/files:
+# [MS-OLEDS]: Object Linking and Embedding (OLE) Data Structures
+# https://msdn.microsoft.com/en-us/library/dd942265.aspx
+
+# - officeparser: https://github.com/unixfreak0037/officeparser
+# TODO: oledump
+
+
+#--- IMPORTS ------------------------------------------------------------------
+
+import logging, struct
+
+
+# === LOGGING =================================================================
+
+class NullHandler(logging.Handler):
+    """
+    Log Handler without output, to avoid printing messages if logging is not
+    configured by the main application.
+    Python 2.7 has logging.NullHandler, but this is necessary for 2.6:
+    see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library
+    """
+    def emit(self, record):
+        pass
+
+def get_logger(name, level=logging.CRITICAL+1):
+    """
+    Create a suitable logger object for this module.
+    The goal is not to change settings of the root logger, to avoid getting
+    other modules' logs on the screen.
+    If a logger exists with same name, reuse it. (Else it would have duplicate
+    handlers and messages would be doubled.)
+    The level is set to CRITICAL+1 by default, to avoid any logging.
+    """
+    # First, test if there is already a logger with the same name, else it
+    # will generate duplicate messages (due to duplicate handlers):
+    if name in logging.Logger.manager.loggerDict:
+        #NOTE: another less intrusive but more "hackish" solution would be to
+        # use getLogger then test if its effective level is not default.
+        logger = logging.getLogger(name)
+        # make sure level is OK:
+        logger.setLevel(level)
+        return logger
+    # get a new logger:
+    logger = logging.getLogger(name)
+    # only add a NullHandler for this logger, it is up to the application
+    # to configure its own logging:
+    logger.addHandler(NullHandler())
+    logger.setLevel(level)
+    return logger
+
+# a global logger object used for debugging:
+log = get_logger('oleobj')
+
+
+# === GLOBAL VARIABLES =======================================================
+
+# struct to parse an unsigned integer of 32 bits:
+struct_uint32 = struct.Struct('<L')
+assert struct_uint32.size == 4  # make sure it matches 4 bytes
+
+# struct to parse an unsigned integer of 16 bits:
+struct_uint16 = struct.Struct('<H')
+assert struct_uint16.size == 2  # make sure it matches 2 bytes
+
+
+# === FUNCTIONS ==============================================================
+
+def read_uint32(data):
+    """
+    Read an unsigned integer from the first 32 bits of data.
+
+    :param data: bytes string containing the data to be extracted.
+    :return: tuple (value, new_data) containing the read value (int),
+             and the new data without the bytes read.
+    """
+    value = struct_uint32.unpack(data[0:4])[0]
+    new_data = data[4:]
+    return (value, new_data)
+
+
+def read_uint16(data):
+    """
+    Read an unsigned integer from the first 16 bits of data.
+
+    :param data: bytes string containing the data to be extracted.
+    :return: tuple (value, new_data) containing the read value (int),
+             and the new data without the bytes read.
+    """
+    value = struct_uint16.unpack(data[0:2])[0]
+    new_data = data[2:]
+    return (value, new_data)
+
+
+def read_LengthPrefixedAnsiString(data):
+    """
+    Read a length-prefixed ANSI string from data.
+
+    :param data: bytes string containing the data to be extracted.
+    :return: tuple (value, new_data) containing the read value (bytes string),
+             and the new data without the bytes read.
+    """
+    length, data = read_uint32(data)
+    # if length = 0, return a null string (no null character)
+    if length == 0:
+        return ('', data)
+    # extract the string without the last null character
+    ansi_string = data[:length-1]
+    # TODO: only in strict mode:
+    # check the presence of the null char:
+    assert data[length] == '\x00'
+    new_data = data[length:]
+    return (ansi_string, new_data)
+
+
+# === CLASSES ================================================================
+
+class OleNativeStream (object):
+    """
+    OLE object contained into an OLENativeStream structure.
+    (see MS-OLEDS 2.3.6 OLENativeStream)
+    """
+    # constants for the type attribute:
+    # see MS-OLEDS 2.2.4 ObjectHeader
+    TYPE_LINKED = 0x01
+    TYPE_EMBEDDED = 0x02
+
+
+    def __init__(self, bindata=None):
+        """
+        Constructor for OleNativeStream.
+        If bindata is provided, it will be parsed using the parse() method.
+
+        :param bindata: bytes, OLENativeStream structure containing an OLE object
+        """
+        self.filename = None
+        self.src_path = None
+        self.unknown_short = None
+        self.unknown_long_1 = None
+        self.unknown_long_2 = None
+        self.temp_path = None
+        self.actual_size = None
+        self.data = None
+        if bindata is not None:
+            self.parse(data=bindata)
+
+    def parse(self, data):
+        """
+        Parse binary data containing an OLENativeStream structure,
+        to extract the OLE object it contains.
+        (see MS-OLEDS 2.3.6 OLENativeStream)
+
+        :param data: bytes, OLENativeStream structure containing an OLE object
+        :return:
+        """
+        # TODO: strict mode to raise exceptions when values are incorrect
+        # (permissive mode by default)
+        # self.native_data_size = struct.unpack('<L', data[0:4])[0]
+        # data = data[4:]
+        # log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size))
+        # I thought this might be an OLE type specifier ???
+        self.unknown_short, data = read_uint16(data)
+        self.filename, data = data.split('\x00', 1)
+        # source path
+        self.src_path, data = data.split('\x00', 1)
+        # TODO I bet these next 8 bytes are a timestamp => FILETIME from olefile
+        self.unknown_long_1, data = read_uint32(data)
+        self.unknown_long_2, data = read_uint32(data)
+        # temp path?
+        self.temp_path, data = data.split('\x00', 1)
+        # size of the rest of the data
+        self.actual_size, data = read_uint32(data)
+        self.data = data[0:self.actual_size]
+        # TODO: exception when size > remaining data
+        # TODO: SLACK DATA
+
+
+class OleObject (object):
+    """
+    OLE 1.0 Object
+
+    see MS-OLEDS 2.2 OLE1.0 Format Structures
+    """
+
+    # constants for the format_id attribute:
+    # see MS-OLEDS 2.2.4 ObjectHeader
+    TYPE_LINKED = 0x01
+    TYPE_EMBEDDED = 0x02
+
+
+    def __init__(self, bindata=None):
+        """
+        Constructor for OleObject.
+        If bindata is provided, it will be parsed using the parse() method.
+
+        :param bindata: bytes, OLE 1.0 Object structure containing an OLE object
+        """
+        self.ole_version = None
+        self.format_id = None
+        self.class_name = None
+        self.topic_name = None
+        self.item_name = None
+        self.data = None
+        self.data_size = None
+
+    def parse(self, data):
+        """
+        Parse binary data containing an OLE 1.0 Object structure,
+        to extract the OLE object it contains.
+        (see MS-OLEDS 2.2 OLE1.0 Format Structures)
+
+        :param data: bytes, OLE 1.0 Object structure containing an OLE object
+        :return:
+        """
+        # Header: see MS-OLEDS 2.2.4 ObjectHeader
+        self.ole_version, data = read_uint32(data)
+        self.format_id, data = read_uint32(data)
+        log.debug('OLE version=%08X - Format ID=%08X' % (self.ole_version, self.format_id))
+        assert self.format_id in (self.TYPE_EMBEDDED, self.TYPE_LINKED)
+        self.class_name, data = read_LengthPrefixedAnsiString(data)
+        self.topic_name, data = read_LengthPrefixedAnsiString(data)
+        self.item_name, data = read_LengthPrefixedAnsiString(data)
+        log.debug('Class name=%r - Topic name=%r - Item name=%r'
+                      % (self.class_name, self.topic_name, self.item_name))
+        if self.format_id == self.TYPE_EMBEDDED:
+            # Embedded object: see MS-OLEDS 2.2.5 EmbeddedObject
+            #assert self.topic_name != '' and self.item_name != ''
+            self.data_size, data = read_uint32(data)
+            log.debug('Declared data size=%d - remaining size=%d' % (self.data_size, len(data)))
+            # TODO: handle incorrect size to avoid exception
+            self.data = data[:self.data_size]
+            assert len(self.data) == self.data_size
+            self.extra_data = data[self.data_size:]
 #!/usr/bin/env python
 """
-rtfobj.py - Philippe Lagadec 2013-04-02
+rtfobj.py
  
 rtfobj is a Python module to extract embedded objects from RTF files, such as
 OLE ojects. It can be used as a Python library or a command-line tool.
@@ -43,8 +43,11 @@ http://www.decalage.info/python/oletools
 # CHANGELOG:
 # 2012-11-09 v0.01 PL: - first version
 # 2013-04-02 v0.02 PL: - fixed bug in main
+# 2015-12-09 v0.03 PL: - configurable logging, CLI options
+#                      - extract OLE 1.0 objects
+#                      - extract files from OLE Package objects
  
-__version__ = '0.02'
+__version__ = '0.03'
  
 #------------------------------------------------------------------------------
 # TODO:
@@ -52,9 +55,55 @@ __version__ = &#39;0.02&#39;
 # - allow semicolon within hex, as found in  this sample:
 #   http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html
  
+
 #=== IMPORTS =================================================================
  
-import re, sys, string, binascii
+import re, sys, string, binascii, logging, optparse
+
+from thirdparty.xglob import xglob
+from oleobj import OleObject, OleNativeStream
+import oleobj
+
+# === LOGGING =================================================================
+
+class NullHandler(logging.Handler):
+    """
+    Log Handler without output, to avoid printing messages if logging is not
+    configured by the main application.
+    Python 2.7 has logging.NullHandler, but this is necessary for 2.6:
+    see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library
+    """
+    def emit(self, record):
+        pass
+
+def get_logger(name, level=logging.CRITICAL+1):
+    """
+    Create a suitable logger object for this module.
+    The goal is not to change settings of the root logger, to avoid getting
+    other modules' logs on the screen.
+    If a logger exists with same name, reuse it. (Else it would have duplicate
+    handlers and messages would be doubled.)
+    The level is set to CRITICAL+1 by default, to avoid any logging.
+    """
+    # First, test if there is already a logger with the same name, else it
+    # will generate duplicate messages (due to duplicate handlers):
+    if name in logging.Logger.manager.loggerDict:
+        #NOTE: another less intrusive but more "hackish" solution would be to
+        # use getLogger then test if its effective level is not default.
+        logger = logging.getLogger(name)
+        # make sure level is OK:
+        logger.setLevel(level)
+        return logger
+    # get a new logger:
+    logger = logging.getLogger(name)
+    # only add a NullHandler for this logger, it is up to the application
+    # to configure its own logging:
+    logger.addHandler(NullHandler())
+    logger.setLevel(level)
+    return logger
+
+# a global logger object used for debugging:
+log = get_logger('rtfobj')
  
  
 #=== CONSTANTS=================================================================
@@ -62,19 +111,47 @@ import re, sys, string, binascii
 # REGEX pattern to extract embedded OLE objects in hexadecimal format:
 # alphanum digit: [0-9A-Fa-f]
 # hex char = two alphanum digits: [0-9A-Fa-f]{2}
+HEX_CHAR = r'[0-9A-Fa-f]{2}'
 # several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,}
+# + word boundaries
+HEX_CHARS_4orMORE = r'\b(?:' + HEX_CHAR + r'){4,}\b'
+# at least 1 hex char:
+HEX_CHARS_1orMORE = r'(?:' + HEX_CHAR + r')+'
+# at least 1 hex char, followed by whitespace or CR/LF:
+HEX_CHARS_1orMORE_WHITESPACES = r'(?:' + HEX_CHAR + r')+\s+'
+# + word boundaries around hex block
+# HEX_CHARS_1orMORE_WHITESPACES = r'\b(?:' + HEX_CHAR + r')+\b\s*'
+# at least one block of hex and whitespace chars, followed by closing curly bracket:
+# HEX_BLOCK_CURLY_BRACKET = r'(?:' + HEX_CHARS_1orMORE_WHITESPACES + r')+\}'
+PATTERN = r'(?:' + HEX_CHARS_1orMORE_WHITESPACES + r')*' + HEX_CHARS_1orMORE
+
 # at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s*
-PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
+# PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
 # improved pattern, allowing semicolons within hex:
 #PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
  
 # a dummy translation table for str.translate, which does not change anythying:
 TRANSTABLE_NOCHANGE = string.maketrans('', '')
  
+re_hexblock = re.compile(PATTERN)
+re_decimal = re.compile(r'\d+')
+
+re_delimiter = re.compile(r'[ \t\r\n\f\v]')
  
-#=== FUNCTIONS =================================================================
+DELIMITER = r'[ \t\r\n\f\v]'
+DELIMITERS_ZeroOrMore = r'[ \t\r\n\f\v]*'
+ANTISLASH_BIN = r'\\bin'
+# According to my tests, Word accepts up to 250 digits (leading zeroes)
+DECIMAL_GROUP = r'(\d{1,250})'
  
-def rtf_iter_objects (filename, min_size=32):
+re_delims_bin_decimal = re.compile(DELIMITERS_ZeroOrMore + ANTISLASH_BIN
+    + DECIMAL_GROUP + DELIMITER)
+re_delim_hexblock = re.compile(DELIMITER + PATTERN)
+
+
+#=== FUNCTIONS ===============================================================
+
+def rtf_iter_objects_old (filename, min_size=32):
     """
     Open a RTF file, extract each embedded object encoded in hexadecimal of
     size > min_size, yield the index of the object in the RTF file and its data
@@ -84,22 +161,197 @@ def rtf_iter_objects (filename, min_size=32):
     data = open(filename, 'rb').read()
     for m in re.finditer(PATTERN, data):
         found = m.group(0)
+        orig_len = len(found)
         # remove all whitespace and line feeds:
         #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE
-        found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v')
+        found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v}')
         found = binascii.unhexlify(found)
         #print repr(found)
         if len(found)>min_size:
-            yield m.start(), found
+            yield m.start(), orig_len, found
+
+# TODO: backward-compatible API?
+
+
+def search_hex_block(data, pos=0, min_size=32, first=True):
+    if first:
+        # Search 1st occurence of a hex block:
+        match = re_hexblock.search(data, pos=pos)
+    else:
+        # Match next occurences of a hex block, from the current position only:
+        match = re_hexblock.match(data, pos=pos)
+
+
+
+def rtf_iter_objects (data, min_size=32):
+    """
+    Open a RTF file, extract each embedded object encoded in hexadecimal of
+    size > min_size, yield the index of the object in the RTF file and its data
+    in binary format.
+    This is an iterator.
+    """
+    # Search 1st occurence of a hex block:
+    match = re_hexblock.search(data)
+    if match is None:
+        # no hex block found
+        return
+    while match is not None:
+        found = match.group(0)
+        # start index
+        start = match.start()
+        # current position
+        current = match.end()
+        if len(found) < min_size:
+            match = re_hexblock.search(data, pos=current)
+            continue
+        log.debug('Found hex block starting at %08X, end %08X' % (start, current))
+        # remove all whitespace and line feeds:
+        #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE
+        found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v')
+        # object data extracted from the RTF file
+        objdata = binascii.unhexlify(found)
+        # Detect the "\bin" control word, which is sometimes used for obfuscation:
+        bin_match = re_delims_bin_decimal.match(data, pos=current)
+        while bin_match is not None:
+            log.debug('Found \\bin block starting at %08X : %r'
+                          % (bin_match.start(), bin_match.group(0)))
+            # extract the decimal integer following '\bin'
+            bin_len = int(bin_match.group(1))
+            log.debug('\\bin block length = %d' % bin_len)
+            if current+bin_len > len(data):
+                log.error('\\bin block length is larger than the remaining data')
+                # move the current index, ignore the \bin block
+                current += len(bin_match.group(0))
+                break
+            # read that number of bytes:
+            objdata += data[current:current+bin_len]
+            # TODO: handle exception
+            current += len(bin_match.group(0)) + bin_len
+            # TODO: check if current is out of range
+            # TODO: is Word limiting the \bin length to a number of digits?
+            log.debug('Current position = %08X' % current)
+            match = re_delim_hexblock.match(data, pos=current)
+            if match is not None:
+                log.debug('Found next hex block starting at %08X, end %08X'
+                    % (match.start(), match.end()))
+                found = match.group(0)
+                # remove all whitespace and line feeds:
+                #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE
+                found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v')
+                objdata += binascii.unhexlify(found)
+                current = match.end()
+            bin_match = re_delims_bin_decimal.match(data, pos=current)
+
+        # print repr(found)
+        if len(objdata)>min_size:
+            yield start, current-start, objdata
+        # Search next occurence of a hex block:
+        match = re_hexblock.search(data, pos=current)
+
+
+def process_file(container, filename, data):
+    # TODO: option to extract objects to files (false by default)
+    if data is None:
+        data = open(filename, 'rb').read()
+    print '-'*79
+    print 'File: %r - %d bytes' % (filename, len(data))
+    for index, orig_len, objdata in rtf_iter_objects(data):
+        print 'found object size %d at index %08X - end %08X' % (len(objdata), index, index+orig_len)
+        fname = '%s_object_%08X.raw' % (filename, index)
+        print 'saving object to file %s' % fname
+        open(fname, 'wb').write(objdata)
+        # TODO: check if all hex data is extracted properly
+
+        obj = OleObject()
+        try:
+            obj.parse(objdata)
+            print 'extract file embedded in OLE object:'
+            print 'format_id  = %d' % obj.format_id
+            print 'class name = %r' % obj.class_name
+            print 'data size  = %d' % obj.data_size
+            # set a file extension according to the class name:
+            class_name = obj.class_name.lower()
+            if class_name.startswith('word'):
+                ext = 'doc'
+            elif class_name.startswith('package'):
+                ext = 'package'
+            else:
+                ext = 'bin'
+            fname = '%s_object_%08X.%s' % (filename, index, ext)
+            print 'saving to file %s' % fname
+            open(fname, 'wb').write(obj.data)
+            if obj.class_name.lower() == 'package':
+                print 'Parsing OLE Package'
+                opkg = OleNativeStream(bindata=obj.data)
+                print 'Filename = %r' % opkg.filename
+                print 'Source path = %r' % opkg.src_path
+                print 'Temp path = %r' % opkg.temp_path
+                if opkg.filename:
+                    fname = '%s_%s' % (filename, opkg.filename)
+                else:
+                    fname = '%s_object_%08X.noname' % (filename, index)
+                print 'saving to file %s' % fname
+                open(fname, 'wb').write(opkg.data)
+        except:
+            pass
+            # log.exception('*** Not an OLE 1.0 Object')
+
  
  
 #=== MAIN =================================================================
  
 if __name__ == '__main__':
-    if len(sys.argv)<2:
-        sys.exit(__doc__)
-    for index, data in rtf_iter_objects(sys.argv[1]):
-        print 'found object size %d at index %08X' % (len(data), index)
-        fname = 'object_%08X.bin' % index
-        print 'saving to file %s' % fname
-        open(fname, 'wb').write(data)
+    # print banner with version
+    print ('rtfobj %s - http://decalage.info/python/oletools' % __version__)
+    print ('THIS IS WORK IN PROGRESS - Check updates regularly!')
+    print ('Please report any issue at https://bitbucket.org/decalage/oletools/issues')
+    print ('')
+
+    DEFAULT_LOG_LEVEL = "warning" # Default log level
+    LOG_LEVELS = {'debug':    logging.DEBUG,
+              'info':     logging.INFO,
+              'warning':  logging.WARNING,
+              'error':    logging.ERROR,
+              'critical': logging.CRITICAL
+             }
+
+    usage = 'usage: %prog [options] <filename> [filename2 ...]'
+    parser = optparse.OptionParser(usage=usage)
+    # parser.add_option('-o', '--outfile', dest='outfile',
+    #     help='output file')
+    # parser.add_option('-c', '--csv', dest='csv',
+    #     help='export results to a CSV file')
+    parser.add_option("-r", action="store_true", dest="recursive",
+        help='find files recursively in subdirectories.')
+    parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
+        help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')
+    parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
+        help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
+    parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
+                            help="logging level debug/info/warning/error/critical (default=%default)")
+
+    (options, args) = parser.parse_args()
+
+    # Print help if no arguments are passed
+    if len(args) == 0:
+        print __doc__
+        parser.print_help()
+        sys.exit()
+
+    # setup logging to the console
+    logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
+    # enable logging in the modules:
+    log.setLevel(logging.NOTSET)
+    oleobj.log.setLevel(logging.NOTSET)
+
+
+    for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
+        zip_password=options.zip_password, zip_fname=options.zip_fname):
+        # ignore directory names stored in zip files:
+        if container and filename.endswith('/'):
+            continue
+        process_file(container, filename, data)
+
+
+
+