Commit 383ae415084903920c31ef787cb343b8572187e5

Authored by Philippe Lagadec
1 parent 43f6a95c

rtfobj: extract OLE 1.0 objects and files from OLE Package objects, improved CLI…

… options and logging. Added new module oleobj to parse OLE structures.
oletools/oleobj.py 0 → 100644
  1 +#!/usr/bin/env python
  2 +"""
  3 +oleobj.py
  4 +
  5 +oleobj is a Python script and module to parse OLE objects and files stored
  6 +into various file formats such as RTF or MS Office documents (e.g. Word, Excel).
  7 +
  8 +Author: Philippe Lagadec - http://www.decalage.info
  9 +License: BSD, see source code or documentation
  10 +
  11 +oleobj is part of the python-oletools package:
  12 +http://www.decalage.info/python/oletools
  13 +"""
  14 +
  15 +# === LICENSE ==================================================================
  16 +
  17 +# oleobj is copyright (c) 2015 Philippe Lagadec (http://www.decalage.info)
  18 +# All rights reserved.
  19 +#
  20 +# Redistribution and use in source and binary forms, with or without modification,
  21 +# are permitted provided that the following conditions are met:
  22 +#
  23 +# * Redistributions of source code must retain the above copyright notice, this
  24 +# list of conditions and the following disclaimer.
  25 +# * Redistributions in binary form must reproduce the above copyright notice,
  26 +# this list of conditions and the following disclaimer in the documentation
  27 +# and/or other materials provided with the distribution.
  28 +#
  29 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  30 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  31 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  32 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  33 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  34 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  35 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  37 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39 +
  40 +
  41 +#------------------------------------------------------------------------------
  42 +# CHANGELOG:
  43 +# 2015-12-05 v0.01 PL: - first version
  44 +
  45 +__version__ = '0.01'
  46 +
  47 +#------------------------------------------------------------------------------
  48 +# TODO:
  49 +# + setup logging (common with other oletools)
  50 +
  51 +
  52 +#------------------------------------------------------------------------------
  53 +# REFERENCES:
  54 +
  55 +# Reference for the storage of embedded OLE objects/files:
  56 +# [MS-OLEDS]: Object Linking and Embedding (OLE) Data Structures
  57 +# https://msdn.microsoft.com/en-us/library/dd942265.aspx
  58 +
  59 +# - officeparser: https://github.com/unixfreak0037/officeparser
  60 +# TODO: oledump
  61 +
  62 +
  63 +#--- IMPORTS ------------------------------------------------------------------
  64 +
  65 +import logging, struct
  66 +
  67 +
  68 +# === LOGGING =================================================================
  69 +
  70 +class NullHandler(logging.Handler):
  71 + """
  72 + Log Handler without output, to avoid printing messages if logging is not
  73 + configured by the main application.
  74 + Python 2.7 has logging.NullHandler, but this is necessary for 2.6:
  75 + see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library
  76 + """
  77 + def emit(self, record):
  78 + pass
  79 +
  80 +def get_logger(name, level=logging.CRITICAL+1):
  81 + """
  82 + Create a suitable logger object for this module.
  83 + The goal is not to change settings of the root logger, to avoid getting
  84 + other modules' logs on the screen.
  85 + If a logger exists with same name, reuse it. (Else it would have duplicate
  86 + handlers and messages would be doubled.)
  87 + The level is set to CRITICAL+1 by default, to avoid any logging.
  88 + """
  89 + # First, test if there is already a logger with the same name, else it
  90 + # will generate duplicate messages (due to duplicate handlers):
  91 + if name in logging.Logger.manager.loggerDict:
  92 + #NOTE: another less intrusive but more "hackish" solution would be to
  93 + # use getLogger then test if its effective level is not default.
  94 + logger = logging.getLogger(name)
  95 + # make sure level is OK:
  96 + logger.setLevel(level)
  97 + return logger
  98 + # get a new logger:
  99 + logger = logging.getLogger(name)
  100 + # only add a NullHandler for this logger, it is up to the application
  101 + # to configure its own logging:
  102 + logger.addHandler(NullHandler())
  103 + logger.setLevel(level)
  104 + return logger
  105 +
  106 +# a global logger object used for debugging:
  107 +log = get_logger('oleobj')
  108 +
  109 +
  110 +# === GLOBAL VARIABLES =======================================================
  111 +
  112 +# struct to parse an unsigned integer of 32 bits:
  113 +struct_uint32 = struct.Struct('<L')
  114 +assert struct_uint32.size == 4 # make sure it matches 4 bytes
  115 +
  116 +# struct to parse an unsigned integer of 16 bits:
  117 +struct_uint16 = struct.Struct('<H')
  118 +assert struct_uint16.size == 2 # make sure it matches 2 bytes
  119 +
  120 +
  121 +# === FUNCTIONS ==============================================================
  122 +
  123 +def read_uint32(data):
  124 + """
  125 + Read an unsigned integer from the first 32 bits of data.
  126 +
  127 + :param data: bytes string containing the data to be extracted.
  128 + :return: tuple (value, new_data) containing the read value (int),
  129 + and the new data without the bytes read.
  130 + """
  131 + value = struct_uint32.unpack(data[0:4])[0]
  132 + new_data = data[4:]
  133 + return (value, new_data)
  134 +
  135 +
  136 +def read_uint16(data):
  137 + """
  138 + Read an unsigned integer from the first 16 bits of data.
  139 +
  140 + :param data: bytes string containing the data to be extracted.
  141 + :return: tuple (value, new_data) containing the read value (int),
  142 + and the new data without the bytes read.
  143 + """
  144 + value = struct_uint16.unpack(data[0:2])[0]
  145 + new_data = data[2:]
  146 + return (value, new_data)
  147 +
  148 +
  149 +def read_LengthPrefixedAnsiString(data):
  150 + """
  151 + Read a length-prefixed ANSI string from data.
  152 +
  153 + :param data: bytes string containing the data to be extracted.
  154 + :return: tuple (value, new_data) containing the read value (bytes string),
  155 + and the new data without the bytes read.
  156 + """
  157 + length, data = read_uint32(data)
  158 + # if length = 0, return a null string (no null character)
  159 + if length == 0:
  160 + return ('', data)
  161 + # extract the string without the last null character
  162 + ansi_string = data[:length-1]
  163 + # TODO: only in strict mode:
  164 + # check the presence of the null char:
  165 + assert data[length] == '\x00'
  166 + new_data = data[length:]
  167 + return (ansi_string, new_data)
  168 +
  169 +
  170 +# === CLASSES ================================================================
  171 +
  172 +class OleNativeStream (object):
  173 + """
  174 + OLE object contained into an OLENativeStream structure.
  175 + (see MS-OLEDS 2.3.6 OLENativeStream)
  176 + """
  177 + # constants for the type attribute:
  178 + # see MS-OLEDS 2.2.4 ObjectHeader
  179 + TYPE_LINKED = 0x01
  180 + TYPE_EMBEDDED = 0x02
  181 +
  182 +
  183 + def __init__(self, bindata=None):
  184 + """
  185 + Constructor for OleNativeStream.
  186 + If bindata is provided, it will be parsed using the parse() method.
  187 +
  188 + :param bindata: bytes, OLENativeStream structure containing an OLE object
  189 + """
  190 + self.filename = None
  191 + self.src_path = None
  192 + self.unknown_short = None
  193 + self.unknown_long_1 = None
  194 + self.unknown_long_2 = None
  195 + self.temp_path = None
  196 + self.actual_size = None
  197 + self.data = None
  198 + if bindata is not None:
  199 + self.parse(data=bindata)
  200 +
  201 + def parse(self, data):
  202 + """
  203 + Parse binary data containing an OLENativeStream structure,
  204 + to extract the OLE object it contains.
  205 + (see MS-OLEDS 2.3.6 OLENativeStream)
  206 +
  207 + :param data: bytes, OLENativeStream structure containing an OLE object
  208 + :return:
  209 + """
  210 + # TODO: strict mode to raise exceptions when values are incorrect
  211 + # (permissive mode by default)
  212 + # self.native_data_size = struct.unpack('<L', data[0:4])[0]
  213 + # data = data[4:]
  214 + # log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size))
  215 + # I thought this might be an OLE type specifier ???
  216 + self.unknown_short, data = read_uint16(data)
  217 + self.filename, data = data.split('\x00', 1)
  218 + # source path
  219 + self.src_path, data = data.split('\x00', 1)
  220 + # TODO I bet these next 8 bytes are a timestamp => FILETIME from olefile
  221 + self.unknown_long_1, data = read_uint32(data)
  222 + self.unknown_long_2, data = read_uint32(data)
  223 + # temp path?
  224 + self.temp_path, data = data.split('\x00', 1)
  225 + # size of the rest of the data
  226 + self.actual_size, data = read_uint32(data)
  227 + self.data = data[0:self.actual_size]
  228 + # TODO: exception when size > remaining data
  229 + # TODO: SLACK DATA
  230 +
  231 +
  232 +class OleObject (object):
  233 + """
  234 + OLE 1.0 Object
  235 +
  236 + see MS-OLEDS 2.2 OLE1.0 Format Structures
  237 + """
  238 +
  239 + # constants for the format_id attribute:
  240 + # see MS-OLEDS 2.2.4 ObjectHeader
  241 + TYPE_LINKED = 0x01
  242 + TYPE_EMBEDDED = 0x02
  243 +
  244 +
  245 + def __init__(self, bindata=None):
  246 + """
  247 + Constructor for OleObject.
  248 + If bindata is provided, it will be parsed using the parse() method.
  249 +
  250 + :param bindata: bytes, OLE 1.0 Object structure containing an OLE object
  251 + """
  252 + self.ole_version = None
  253 + self.format_id = None
  254 + self.class_name = None
  255 + self.topic_name = None
  256 + self.item_name = None
  257 + self.data = None
  258 + self.data_size = None
  259 +
  260 + def parse(self, data):
  261 + """
  262 + Parse binary data containing an OLE 1.0 Object structure,
  263 + to extract the OLE object it contains.
  264 + (see MS-OLEDS 2.2 OLE1.0 Format Structures)
  265 +
  266 + :param data: bytes, OLE 1.0 Object structure containing an OLE object
  267 + :return:
  268 + """
  269 + # Header: see MS-OLEDS 2.2.4 ObjectHeader
  270 + self.ole_version, data = read_uint32(data)
  271 + self.format_id, data = read_uint32(data)
  272 + log.debug('OLE version=%08X - Format ID=%08X' % (self.ole_version, self.format_id))
  273 + assert self.format_id in (self.TYPE_EMBEDDED, self.TYPE_LINKED)
  274 + self.class_name, data = read_LengthPrefixedAnsiString(data)
  275 + self.topic_name, data = read_LengthPrefixedAnsiString(data)
  276 + self.item_name, data = read_LengthPrefixedAnsiString(data)
  277 + log.debug('Class name=%r - Topic name=%r - Item name=%r'
  278 + % (self.class_name, self.topic_name, self.item_name))
  279 + if self.format_id == self.TYPE_EMBEDDED:
  280 + # Embedded object: see MS-OLEDS 2.2.5 EmbeddedObject
  281 + #assert self.topic_name != '' and self.item_name != ''
  282 + self.data_size, data = read_uint32(data)
  283 + log.debug('Declared data size=%d - remaining size=%d' % (self.data_size, len(data)))
  284 + # TODO: handle incorrect size to avoid exception
  285 + self.data = data[:self.data_size]
  286 + assert len(self.data) == self.data_size
  287 + self.extra_data = data[self.data_size:]
... ...
oletools/rtfobj.py
1 1 #!/usr/bin/env python
2 2 """
3   -rtfobj.py - Philippe Lagadec 2013-04-02
  3 +rtfobj.py
4 4  
5 5 rtfobj is a Python module to extract embedded objects from RTF files, such as
6 6 OLE ojects. It can be used as a Python library or a command-line tool.
... ... @@ -43,8 +43,11 @@ http://www.decalage.info/python/oletools
43 43 # CHANGELOG:
44 44 # 2012-11-09 v0.01 PL: - first version
45 45 # 2013-04-02 v0.02 PL: - fixed bug in main
  46 +# 2015-12-09 v0.03 PL: - configurable logging, CLI options
  47 +# - extract OLE 1.0 objects
  48 +# - extract files from OLE Package objects
46 49  
47   -__version__ = '0.02'
  50 +__version__ = '0.03'
48 51  
49 52 #------------------------------------------------------------------------------
50 53 # TODO:
... ... @@ -52,9 +55,55 @@ __version__ = &#39;0.02&#39;
52 55 # - allow semicolon within hex, as found in this sample:
53 56 # http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html
54 57  
  58 +
55 59 #=== IMPORTS =================================================================
56 60  
57   -import re, sys, string, binascii
  61 +import re, sys, string, binascii, logging, optparse
  62 +
  63 +from thirdparty.xglob import xglob
  64 +from oleobj import OleObject, OleNativeStream
  65 +import oleobj
  66 +
  67 +# === LOGGING =================================================================
  68 +
  69 +class NullHandler(logging.Handler):
  70 + """
  71 + Log Handler without output, to avoid printing messages if logging is not
  72 + configured by the main application.
  73 + Python 2.7 has logging.NullHandler, but this is necessary for 2.6:
  74 + see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library
  75 + """
  76 + def emit(self, record):
  77 + pass
  78 +
  79 +def get_logger(name, level=logging.CRITICAL+1):
  80 + """
  81 + Create a suitable logger object for this module.
  82 + The goal is not to change settings of the root logger, to avoid getting
  83 + other modules' logs on the screen.
  84 + If a logger exists with same name, reuse it. (Else it would have duplicate
  85 + handlers and messages would be doubled.)
  86 + The level is set to CRITICAL+1 by default, to avoid any logging.
  87 + """
  88 + # First, test if there is already a logger with the same name, else it
  89 + # will generate duplicate messages (due to duplicate handlers):
  90 + if name in logging.Logger.manager.loggerDict:
  91 + #NOTE: another less intrusive but more "hackish" solution would be to
  92 + # use getLogger then test if its effective level is not default.
  93 + logger = logging.getLogger(name)
  94 + # make sure level is OK:
  95 + logger.setLevel(level)
  96 + return logger
  97 + # get a new logger:
  98 + logger = logging.getLogger(name)
  99 + # only add a NullHandler for this logger, it is up to the application
  100 + # to configure its own logging:
  101 + logger.addHandler(NullHandler())
  102 + logger.setLevel(level)
  103 + return logger
  104 +
  105 +# a global logger object used for debugging:
  106 +log = get_logger('rtfobj')
58 107  
59 108  
60 109 #=== CONSTANTS=================================================================
... ... @@ -62,19 +111,47 @@ import re, sys, string, binascii
62 111 # REGEX pattern to extract embedded OLE objects in hexadecimal format:
63 112 # alphanum digit: [0-9A-Fa-f]
64 113 # hex char = two alphanum digits: [0-9A-Fa-f]{2}
  114 +HEX_CHAR = r'[0-9A-Fa-f]{2}'
65 115 # several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,}
  116 +# + word boundaries
  117 +HEX_CHARS_4orMORE = r'\b(?:' + HEX_CHAR + r'){4,}\b'
  118 +# at least 1 hex char:
  119 +HEX_CHARS_1orMORE = r'(?:' + HEX_CHAR + r')+'
  120 +# at least 1 hex char, followed by whitespace or CR/LF:
  121 +HEX_CHARS_1orMORE_WHITESPACES = r'(?:' + HEX_CHAR + r')+\s+'
  122 +# + word boundaries around hex block
  123 +# HEX_CHARS_1orMORE_WHITESPACES = r'\b(?:' + HEX_CHAR + r')+\b\s*'
  124 +# at least one block of hex and whitespace chars, followed by closing curly bracket:
  125 +# HEX_BLOCK_CURLY_BRACKET = r'(?:' + HEX_CHARS_1orMORE_WHITESPACES + r')+\}'
  126 +PATTERN = r'(?:' + HEX_CHARS_1orMORE_WHITESPACES + r')*' + HEX_CHARS_1orMORE
  127 +
66 128 # at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s*
67   -PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
  129 +# PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
68 130 # improved pattern, allowing semicolons within hex:
69 131 #PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
70 132  
71 133 # a dummy translation table for str.translate, which does not change anythying:
72 134 TRANSTABLE_NOCHANGE = string.maketrans('', '')
73 135  
  136 +re_hexblock = re.compile(PATTERN)
  137 +re_decimal = re.compile(r'\d+')
  138 +
  139 +re_delimiter = re.compile(r'[ \t\r\n\f\v]')
74 140  
75   -#=== FUNCTIONS =================================================================
  141 +DELIMITER = r'[ \t\r\n\f\v]'
  142 +DELIMITERS_ZeroOrMore = r'[ \t\r\n\f\v]*'
  143 +ANTISLASH_BIN = r'\\bin'
  144 +# According to my tests, Word accepts up to 250 digits (leading zeroes)
  145 +DECIMAL_GROUP = r'(\d{1,250})'
76 146  
77   -def rtf_iter_objects (filename, min_size=32):
  147 +re_delims_bin_decimal = re.compile(DELIMITERS_ZeroOrMore + ANTISLASH_BIN
  148 + + DECIMAL_GROUP + DELIMITER)
  149 +re_delim_hexblock = re.compile(DELIMITER + PATTERN)
  150 +
  151 +
  152 +#=== FUNCTIONS ===============================================================
  153 +
  154 +def rtf_iter_objects_old (filename, min_size=32):
78 155 """
79 156 Open a RTF file, extract each embedded object encoded in hexadecimal of
80 157 size > min_size, yield the index of the object in the RTF file and its data
... ... @@ -84,22 +161,197 @@ def rtf_iter_objects (filename, min_size=32):
84 161 data = open(filename, 'rb').read()
85 162 for m in re.finditer(PATTERN, data):
86 163 found = m.group(0)
  164 + orig_len = len(found)
87 165 # remove all whitespace and line feeds:
88 166 #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE
89   - found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v')
  167 + found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v}')
90 168 found = binascii.unhexlify(found)
91 169 #print repr(found)
92 170 if len(found)>min_size:
93   - yield m.start(), found
  171 + yield m.start(), orig_len, found
  172 +
  173 +# TODO: backward-compatible API?
  174 +
  175 +
  176 +def search_hex_block(data, pos=0, min_size=32, first=True):
  177 + if first:
  178 + # Search 1st occurence of a hex block:
  179 + match = re_hexblock.search(data, pos=pos)
  180 + else:
  181 + # Match next occurences of a hex block, from the current position only:
  182 + match = re_hexblock.match(data, pos=pos)
  183 +
  184 +
  185 +
  186 +def rtf_iter_objects (data, min_size=32):
  187 + """
  188 + Open a RTF file, extract each embedded object encoded in hexadecimal of
  189 + size > min_size, yield the index of the object in the RTF file and its data
  190 + in binary format.
  191 + This is an iterator.
  192 + """
  193 + # Search 1st occurence of a hex block:
  194 + match = re_hexblock.search(data)
  195 + if match is None:
  196 + # no hex block found
  197 + return
  198 + while match is not None:
  199 + found = match.group(0)
  200 + # start index
  201 + start = match.start()
  202 + # current position
  203 + current = match.end()
  204 + if len(found) < min_size:
  205 + match = re_hexblock.search(data, pos=current)
  206 + continue
  207 + log.debug('Found hex block starting at %08X, end %08X' % (start, current))
  208 + # remove all whitespace and line feeds:
  209 + #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE
  210 + found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v')
  211 + # object data extracted from the RTF file
  212 + objdata = binascii.unhexlify(found)
  213 + # Detect the "\bin" control word, which is sometimes used for obfuscation:
  214 + bin_match = re_delims_bin_decimal.match(data, pos=current)
  215 + while bin_match is not None:
  216 + log.debug('Found \\bin block starting at %08X : %r'
  217 + % (bin_match.start(), bin_match.group(0)))
  218 + # extract the decimal integer following '\bin'
  219 + bin_len = int(bin_match.group(1))
  220 + log.debug('\\bin block length = %d' % bin_len)
  221 + if current+bin_len > len(data):
  222 + log.error('\\bin block length is larger than the remaining data')
  223 + # move the current index, ignore the \bin block
  224 + current += len(bin_match.group(0))
  225 + break
  226 + # read that number of bytes:
  227 + objdata += data[current:current+bin_len]
  228 + # TODO: handle exception
  229 + current += len(bin_match.group(0)) + bin_len
  230 + # TODO: check if current is out of range
  231 + # TODO: is Word limiting the \bin length to a number of digits?
  232 + log.debug('Current position = %08X' % current)
  233 + match = re_delim_hexblock.match(data, pos=current)
  234 + if match is not None:
  235 + log.debug('Found next hex block starting at %08X, end %08X'
  236 + % (match.start(), match.end()))
  237 + found = match.group(0)
  238 + # remove all whitespace and line feeds:
  239 + #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE
  240 + found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v')
  241 + objdata += binascii.unhexlify(found)
  242 + current = match.end()
  243 + bin_match = re_delims_bin_decimal.match(data, pos=current)
  244 +
  245 + # print repr(found)
  246 + if len(objdata)>min_size:
  247 + yield start, current-start, objdata
  248 + # Search next occurence of a hex block:
  249 + match = re_hexblock.search(data, pos=current)
  250 +
  251 +
  252 +def process_file(container, filename, data):
  253 + # TODO: option to extract objects to files (false by default)
  254 + if data is None:
  255 + data = open(filename, 'rb').read()
  256 + print '-'*79
  257 + print 'File: %r - %d bytes' % (filename, len(data))
  258 + for index, orig_len, objdata in rtf_iter_objects(data):
  259 + print 'found object size %d at index %08X - end %08X' % (len(objdata), index, index+orig_len)
  260 + fname = '%s_object_%08X.raw' % (filename, index)
  261 + print 'saving object to file %s' % fname
  262 + open(fname, 'wb').write(objdata)
  263 + # TODO: check if all hex data is extracted properly
  264 +
  265 + obj = OleObject()
  266 + try:
  267 + obj.parse(objdata)
  268 + print 'extract file embedded in OLE object:'
  269 + print 'format_id = %d' % obj.format_id
  270 + print 'class name = %r' % obj.class_name
  271 + print 'data size = %d' % obj.data_size
  272 + # set a file extension according to the class name:
  273 + class_name = obj.class_name.lower()
  274 + if class_name.startswith('word'):
  275 + ext = 'doc'
  276 + elif class_name.startswith('package'):
  277 + ext = 'package'
  278 + else:
  279 + ext = 'bin'
  280 + fname = '%s_object_%08X.%s' % (filename, index, ext)
  281 + print 'saving to file %s' % fname
  282 + open(fname, 'wb').write(obj.data)
  283 + if obj.class_name.lower() == 'package':
  284 + print 'Parsing OLE Package'
  285 + opkg = OleNativeStream(bindata=obj.data)
  286 + print 'Filename = %r' % opkg.filename
  287 + print 'Source path = %r' % opkg.src_path
  288 + print 'Temp path = %r' % opkg.temp_path
  289 + if opkg.filename:
  290 + fname = '%s_%s' % (filename, opkg.filename)
  291 + else:
  292 + fname = '%s_object_%08X.noname' % (filename, index)
  293 + print 'saving to file %s' % fname
  294 + open(fname, 'wb').write(opkg.data)
  295 + except:
  296 + pass
  297 + # log.exception('*** Not an OLE 1.0 Object')
  298 +
94 299  
95 300  
96 301 #=== MAIN =================================================================
97 302  
98 303 if __name__ == '__main__':
99   - if len(sys.argv)<2:
100   - sys.exit(__doc__)
101   - for index, data in rtf_iter_objects(sys.argv[1]):
102   - print 'found object size %d at index %08X' % (len(data), index)
103   - fname = 'object_%08X.bin' % index
104   - print 'saving to file %s' % fname
105   - open(fname, 'wb').write(data)
  304 + # print banner with version
  305 + print ('rtfobj %s - http://decalage.info/python/oletools' % __version__)
  306 + print ('THIS IS WORK IN PROGRESS - Check updates regularly!')
  307 + print ('Please report any issue at https://bitbucket.org/decalage/oletools/issues')
  308 + print ('')
  309 +
  310 + DEFAULT_LOG_LEVEL = "warning" # Default log level
  311 + LOG_LEVELS = {'debug': logging.DEBUG,
  312 + 'info': logging.INFO,
  313 + 'warning': logging.WARNING,
  314 + 'error': logging.ERROR,
  315 + 'critical': logging.CRITICAL
  316 + }
  317 +
  318 + usage = 'usage: %prog [options] <filename> [filename2 ...]'
  319 + parser = optparse.OptionParser(usage=usage)
  320 + # parser.add_option('-o', '--outfile', dest='outfile',
  321 + # help='output file')
  322 + # parser.add_option('-c', '--csv', dest='csv',
  323 + # help='export results to a CSV file')
  324 + parser.add_option("-r", action="store_true", dest="recursive",
  325 + help='find files recursively in subdirectories.')
  326 + parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
  327 + help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')
  328 + parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
  329 + help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
  330 + parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
  331 + help="logging level debug/info/warning/error/critical (default=%default)")
  332 +
  333 + (options, args) = parser.parse_args()
  334 +
  335 + # Print help if no arguments are passed
  336 + if len(args) == 0:
  337 + print __doc__
  338 + parser.print_help()
  339 + sys.exit()
  340 +
  341 + # setup logging to the console
  342 + logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
  343 + # enable logging in the modules:
  344 + log.setLevel(logging.NOTSET)
  345 + oleobj.log.setLevel(logging.NOTSET)
  346 +
  347 +
  348 + for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
  349 + zip_password=options.zip_password, zip_fname=options.zip_fname):
  350 + # ignore directory names stored in zip files:
  351 + if container and filename.endswith('/'):
  352 + continue
  353 + process_file(container, filename, data)
  354 +
  355 +
  356 +
  357 +
... ...