Commit 670d70755885e7e8c5c0e012ee23db95365d6121
1 parent
1665aeea
oleobj: make pylint and pep8 happier
Most changes are just whitespace or line break or case changes. But: - this did find an actual error (variable exc was used before creation) - did move imports up between license and changelog (although I would prefer it in its original place) - removed the _ansi_ from read_*_ansi_string - move logging constants from main to global scope
Showing
1 changed file
with
160 additions
and
121 deletions
oletools/oleobj.py
| 1 | #!/usr/bin/env python | 1 | #!/usr/bin/env python |
| 2 | -from __future__ import print_function | ||
| 3 | """ | 2 | """ |
| 4 | oleobj.py | 3 | oleobj.py |
| 5 | 4 | ||
| 6 | oleobj is a Python script and module to parse OLE objects and files stored | 5 | oleobj is a Python script and module to parse OLE objects and files stored |
| 7 | -into various file formats such as RTF or MS Office documents (e.g. Word, Excel). | 6 | +into various file formats such as RTF or MS Office documents |
| 7 | +(e.g. Word, Excel). | ||
| 8 | 8 | ||
| 9 | Author: Philippe Lagadec - http://www.decalage.info | 9 | Author: Philippe Lagadec - http://www.decalage.info |
| 10 | License: BSD, see source code or documentation | 10 | License: BSD, see source code or documentation |
| @@ -13,33 +13,63 @@ oleobj is part of the python-oletools package: | @@ -13,33 +13,63 @@ oleobj is part of the python-oletools package: | ||
| 13 | http://www.decalage.info/python/oletools | 13 | http://www.decalage.info/python/oletools |
| 14 | """ | 14 | """ |
| 15 | 15 | ||
| 16 | -# === LICENSE ================================================================== | 16 | +# === LICENSE ================================================================= |
| 17 | 17 | ||
| 18 | # oleobj is copyright (c) 2015-2017 Philippe Lagadec (http://www.decalage.info) | 18 | # oleobj is copyright (c) 2015-2017 Philippe Lagadec (http://www.decalage.info) |
| 19 | # All rights reserved. | 19 | # All rights reserved. |
| 20 | # | 20 | # |
| 21 | -# Redistribution and use in source and binary forms, with or without modification, | ||
| 22 | -# are permitted provided that the following conditions are met: | 21 | +# Redistribution and use in source and binary forms, with or without |
| 22 | +# modification, are permitted provided that the following conditions are met: | ||
| 23 | # | 23 | # |
| 24 | -# * Redistributions of source code must retain the above copyright notice, this | ||
| 25 | -# list of conditions and the following disclaimer. | 24 | +# * Redistributions of source code must retain the above copyright notice, |
| 25 | +# this list of conditions and the following disclaimer. | ||
| 26 | # * Redistributions in binary form must reproduce the above copyright notice, | 26 | # * Redistributions in binary form must reproduce the above copyright notice, |
| 27 | # this list of conditions and the following disclaimer in the documentation | 27 | # this list of conditions and the following disclaimer in the documentation |
| 28 | # and/or other materials provided with the distribution. | 28 | # and/or other materials provided with the distribution. |
| 29 | # | 29 | # |
| 30 | -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
| 31 | -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
| 32 | -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 33 | -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
| 34 | -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 35 | -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| 36 | -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
| 37 | -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| 38 | -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 39 | -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 40 | - | ||
| 41 | - | ||
| 42 | -#------------------------------------------------------------------------------ | 30 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 31 | +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 32 | +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
| 33 | +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | ||
| 34 | +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
| 35 | +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
| 36 | +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
| 37 | +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
| 38 | +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
| 39 | +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
| 40 | +# POSSIBILITY OF SUCH DAMAGE. | ||
| 41 | + | ||
| 42 | + | ||
| 43 | +# -- IMPORTS ------------------------------------------------------------------ | ||
| 44 | + | ||
| 45 | +from __future__ import print_function | ||
| 46 | + | ||
| 47 | +import logging | ||
| 48 | +import struct | ||
| 49 | +import optparse | ||
| 50 | +import os | ||
| 51 | +import re | ||
| 52 | +import sys | ||
| 53 | +from zipfile import is_zipfile, ZipFile | ||
| 54 | + | ||
| 55 | +# IMPORTANT: it should be possible to run oletools directly as scripts | ||
| 56 | +# in any directory without installing them with pip or setup.py. | ||
| 57 | +# In that case, relative imports are NOT usable. | ||
| 58 | +# And to enable Python 2+3 compatibility, we need to use absolute imports, | ||
| 59 | +# so we add the oletools parent folder to sys.path (absolute+normalized path): | ||
| 60 | +try: | ||
| 61 | + from oletools.thirdparty import olefile | ||
| 62 | +except ImportError: | ||
| 63 | + PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname( | ||
| 64 | + os.path.abspath(__file__)))) | ||
| 65 | + if PARENT_DIR not in sys.path: | ||
| 66 | + sys.path.insert(0, PARENT_DIR) | ||
| 67 | + del PARENT_DIR | ||
| 68 | + from oletools.thirdparty import olefile | ||
| 69 | +from oletools.thirdparty import xglob | ||
| 70 | +from ppt_record_parser import is_ppt, PptFile, PptRecordExOleVbaActiveXAtom | ||
| 71 | + | ||
| 72 | +# ----------------------------------------------------------------------------- | ||
| 43 | # CHANGELOG: | 73 | # CHANGELOG: |
| 44 | # 2015-12-05 v0.01 PL: - first version | 74 | # 2015-12-05 v0.01 PL: - first version |
| 45 | # 2016-06 PL: - added main and process_file (not working yet) | 75 | # 2016-06 PL: - added main and process_file (not working yet) |
| @@ -51,12 +81,12 @@ http://www.decalage.info/python/oletools | @@ -51,12 +81,12 @@ http://www.decalage.info/python/oletools | ||
| 51 | 81 | ||
| 52 | __version__ = '0.51' | 82 | __version__ = '0.51' |
| 53 | 83 | ||
| 54 | -#------------------------------------------------------------------------------ | 84 | +# ----------------------------------------------------------------------------- |
| 55 | # TODO: | 85 | # TODO: |
| 56 | # + setup logging (common with other oletools) | 86 | # + setup logging (common with other oletools) |
| 57 | 87 | ||
| 58 | 88 | ||
| 59 | -#------------------------------------------------------------------------------ | 89 | +# ----------------------------------------------------------------------------- |
| 60 | # REFERENCES: | 90 | # REFERENCES: |
| 61 | 91 | ||
| 62 | # Reference for the storage of embedded OLE objects/files: | 92 | # Reference for the storage of embedded OLE objects/files: |
| @@ -67,38 +97,28 @@ __version__ = '0.51' | @@ -67,38 +97,28 @@ __version__ = '0.51' | ||
| 67 | # TODO: oledump | 97 | # TODO: oledump |
| 68 | 98 | ||
| 69 | 99 | ||
| 70 | -#--- IMPORTS ------------------------------------------------------------------ | ||
| 71 | - | ||
| 72 | -import logging, struct, optparse, os, re, sys | 100 | +# === LOGGING ================================================================= |
| 73 | 101 | ||
| 74 | -# IMPORTANT: it should be possible to run oletools directly as scripts | ||
| 75 | -# in any directory without installing them with pip or setup.py. | ||
| 76 | -# In that case, relative imports are NOT usable. | ||
| 77 | -# And to enable Python 2+3 compatibility, we need to use absolute imports, | ||
| 78 | -# so we add the oletools parent folder to sys.path (absolute+normalized path): | ||
| 79 | -_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) | ||
| 80 | -# print('_thismodule_dir = %r' % _thismodule_dir) | ||
| 81 | -_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..')) | ||
| 82 | -# print('_parent_dir = %r' % _thirdparty_dir) | ||
| 83 | -if not _parent_dir in sys.path: | ||
| 84 | - sys.path.insert(0, _parent_dir) | ||
| 85 | - | ||
| 86 | -from oletools.thirdparty.olefile import olefile | ||
| 87 | -from oletools.thirdparty.xglob import xglob | ||
| 88 | -from ppt_record_parser import is_ppt, PptFile, PptRecordExOleVbaActiveXAtom | 102 | +DEFAULT_LOG_LEVEL = "warning" |
| 103 | +LOG_LEVELS = {'debug': logging.DEBUG, | ||
| 104 | + 'info': logging.INFO, | ||
| 105 | + 'warning': logging.WARNING, | ||
| 106 | + 'error': logging.ERROR, | ||
| 107 | + 'critical': logging.CRITICAL} | ||
| 89 | 108 | ||
| 90 | -# === LOGGING ================================================================= | ||
| 91 | 109 | ||
| 92 | class NullHandler(logging.Handler): | 110 | class NullHandler(logging.Handler): |
| 93 | """ | 111 | """ |
| 94 | Log Handler without output, to avoid printing messages if logging is not | 112 | Log Handler without output, to avoid printing messages if logging is not |
| 95 | configured by the main application. | 113 | configured by the main application. |
| 96 | Python 2.7 has logging.NullHandler, but this is necessary for 2.6: | 114 | Python 2.7 has logging.NullHandler, but this is necessary for 2.6: |
| 97 | - see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library | 115 | + see https://docs.python.org/2.6/library/logging.html section |
| 116 | + configuring-logging-for-a-library | ||
| 98 | """ | 117 | """ |
| 99 | def emit(self, record): | 118 | def emit(self, record): |
| 100 | pass | 119 | pass |
| 101 | 120 | ||
| 121 | + | ||
| 102 | def get_logger(name, level=logging.CRITICAL+1): | 122 | def get_logger(name, level=logging.CRITICAL+1): |
| 103 | """ | 123 | """ |
| 104 | Create a suitable logger object for this module. | 124 | Create a suitable logger object for this module. |
| @@ -111,7 +131,7 @@ def get_logger(name, level=logging.CRITICAL+1): | @@ -111,7 +131,7 @@ def get_logger(name, level=logging.CRITICAL+1): | ||
| 111 | # First, test if there is already a logger with the same name, else it | 131 | # First, test if there is already a logger with the same name, else it |
| 112 | # will generate duplicate messages (due to duplicate handlers): | 132 | # will generate duplicate messages (due to duplicate handlers): |
| 113 | if name in logging.Logger.manager.loggerDict: | 133 | if name in logging.Logger.manager.loggerDict: |
| 114 | - #NOTE: another less intrusive but more "hackish" solution would be to | 134 | + # NOTE: another less intrusive but more "hackish" solution would be to |
| 115 | # use getLogger then test if its effective level is not default. | 135 | # use getLogger then test if its effective level is not default. |
| 116 | logger = logging.getLogger(name) | 136 | logger = logging.getLogger(name) |
| 117 | # make sure level is OK: | 137 | # make sure level is OK: |
| @@ -125,8 +145,10 @@ def get_logger(name, level=logging.CRITICAL+1): | @@ -125,8 +145,10 @@ def get_logger(name, level=logging.CRITICAL+1): | ||
| 125 | logger.setLevel(level) | 145 | logger.setLevel(level) |
| 126 | return logger | 146 | return logger |
| 127 | 147 | ||
| 148 | + | ||
| 128 | # a global logger object used for debugging: | 149 | # a global logger object used for debugging: |
| 129 | -log = get_logger('oleobj') | 150 | +log = get_logger('oleobj') # pylint: disable=invalid-name |
| 151 | + | ||
| 130 | 152 | ||
| 131 | def enable_logging(): | 153 | def enable_logging(): |
| 132 | """ | 154 | """ |
| @@ -137,7 +159,7 @@ def enable_logging(): | @@ -137,7 +159,7 @@ def enable_logging(): | ||
| 137 | log.setLevel(logging.NOTSET) | 159 | log.setLevel(logging.NOTSET) |
| 138 | 160 | ||
| 139 | 161 | ||
| 140 | -# === CONSTANTS ============================================================== | 162 | +# === CONSTANTS =============================================================== |
| 141 | 163 | ||
| 142 | # some str methods on Python 2.x return characters, | 164 | # some str methods on Python 2.x return characters, |
| 143 | # while the equivalent bytes methods return integers on Python 3.x: | 165 | # while the equivalent bytes methods return integers on Python 3.x: |
| @@ -146,18 +168,19 @@ if sys.version_info[0] <= 2: | @@ -146,18 +168,19 @@ if sys.version_info[0] <= 2: | ||
| 146 | NULL_CHAR = '\x00' | 168 | NULL_CHAR = '\x00' |
| 147 | else: | 169 | else: |
| 148 | # Python 3.x | 170 | # Python 3.x |
| 149 | - NULL_CHAR = 0 | 171 | + NULL_CHAR = 0 # pylint: disable=redefined-variable-type |
| 172 | + xrange = range # pylint: disable=redefined-builtin, invalid-name | ||
| 150 | 173 | ||
| 151 | 174 | ||
| 152 | -# === GLOBAL VARIABLES ======================================================= | 175 | +# === GLOBAL VARIABLES ======================================================== |
| 153 | 176 | ||
| 154 | # struct to parse an unsigned integer of 32 bits: | 177 | # struct to parse an unsigned integer of 32 bits: |
| 155 | -struct_uint32 = struct.Struct('<L') | ||
| 156 | -assert struct_uint32.size == 4 # make sure it matches 4 bytes | 178 | +STRUCT_UINT32 = struct.Struct('<L') |
| 179 | +assert STRUCT_UINT32.size == 4 # make sure it matches 4 bytes | ||
| 157 | 180 | ||
| 158 | # struct to parse an unsigned integer of 16 bits: | 181 | # struct to parse an unsigned integer of 16 bits: |
| 159 | -struct_uint16 = struct.Struct('<H') | ||
| 160 | -assert struct_uint16.size == 2 # make sure it matches 2 bytes | 182 | +STRUCT_UINT16 = struct.Struct('<H') |
| 183 | +assert STRUCT_UINT16.size == 2 # make sure it matches 2 bytes | ||
| 161 | 184 | ||
| 162 | # max length of a zero-terminated ansi string. Not sure what this really is | 185 | # max length of a zero-terminated ansi string. Not sure what this really is |
| 163 | STR_MAX_LEN = 1024 | 186 | STR_MAX_LEN = 1024 |
| @@ -173,7 +196,9 @@ RETURN_ERR_ARGS = 2 # reserve for OptionParser.parse_args | @@ -173,7 +196,9 @@ RETURN_ERR_ARGS = 2 # reserve for OptionParser.parse_args | ||
| 173 | RETURN_ERR_STREAM = 4 # error opening/parsing a stream | 196 | RETURN_ERR_STREAM = 4 # error opening/parsing a stream |
| 174 | RETURN_ERR_DUMP = 8 # error dumping data from stream to file | 197 | RETURN_ERR_DUMP = 8 # error dumping data from stream to file |
| 175 | 198 | ||
| 176 | -# === FUNCTIONS ============================================================== | 199 | + |
| 200 | +# === FUNCTIONS =============================================================== | ||
| 201 | + | ||
| 177 | 202 | ||
| 178 | def read_uint32(data, index): | 203 | def read_uint32(data, index): |
| 179 | """ | 204 | """ |
| @@ -185,9 +210,9 @@ def read_uint32(data, index): | @@ -185,9 +210,9 @@ def read_uint32(data, index): | ||
| 185 | and the index to continue reading next time. | 210 | and the index to continue reading next time. |
| 186 | """ | 211 | """ |
| 187 | if index is None: | 212 | if index is None: |
| 188 | - value = struct_uint32.unpack(data.read(4))[0] | 213 | + value = STRUCT_UINT32.unpack(data.read(4))[0] |
| 189 | else: | 214 | else: |
| 190 | - value = struct_uint32.unpack(data[index:index+4])[0] | 215 | + value = STRUCT_UINT32.unpack(data[index:index+4])[0] |
| 191 | index += 4 | 216 | index += 4 |
| 192 | return (value, index) | 217 | return (value, index) |
| 193 | 218 | ||
| @@ -202,19 +227,20 @@ def read_uint16(data, index): | @@ -202,19 +227,20 @@ def read_uint16(data, index): | ||
| 202 | and the index to continue reading next time. | 227 | and the index to continue reading next time. |
| 203 | """ | 228 | """ |
| 204 | if index is None: | 229 | if index is None: |
| 205 | - value = struct_uint16.unpack(data.read(2))[0] | 230 | + value = STRUCT_UINT16.unpack(data.read(2))[0] |
| 206 | else: | 231 | else: |
| 207 | - value = struct_uint16.unpack(data[index:index+2])[0] | 232 | + value = STRUCT_UINT16.unpack(data[index:index+2])[0] |
| 208 | index += 2 | 233 | index += 2 |
| 209 | return (value, index) | 234 | return (value, index) |
| 210 | 235 | ||
| 211 | 236 | ||
| 212 | -def read_LengthPrefixedAnsiString(data, index): | 237 | +def read_length_prefixed_string(data, index): |
| 213 | """ | 238 | """ |
| 214 | Read a length-prefixed ANSI string from data. | 239 | Read a length-prefixed ANSI string from data. |
| 215 | 240 | ||
| 216 | :param data: bytes string or stream containing the data to be extracted. | 241 | :param data: bytes string or stream containing the data to be extracted. |
| 217 | - :param index: index in data where string size start or None if data is stream | 242 | + :param index: index in data where string size start or None if data is |
| 243 | + stream | ||
| 218 | :return: tuple (value, index) containing the read value (bytes string), | 244 | :return: tuple (value, index) containing the read value (bytes string), |
| 219 | and the index to start reading from next time. | 245 | and the index to start reading from next time. |
| 220 | """ | 246 | """ |
| @@ -236,20 +262,21 @@ def read_LengthPrefixedAnsiString(data, index): | @@ -236,20 +262,21 @@ def read_LengthPrefixedAnsiString(data, index): | ||
| 236 | return (ansi_string, index) | 262 | return (ansi_string, index) |
| 237 | 263 | ||
| 238 | 264 | ||
| 239 | -def read_zero_terminated_ansi_string(data, index): | 265 | +def read_zero_terminated_string(data, index): |
| 240 | """ | 266 | """ |
| 241 | Read a zero-terminated ANSI string from data | 267 | Read a zero-terminated ANSI string from data |
| 242 | 268 | ||
| 243 | Guessing that max length is 256 bytes. | 269 | Guessing that max length is 256 bytes. |
| 244 | 270 | ||
| 245 | :param data: bytes string or stream containing an ansi string | 271 | :param data: bytes string or stream containing an ansi string |
| 246 | - :param index: index at which the string should start or None if data is stream | 272 | + :param index: index at which the string should start or None if data is |
| 273 | + stream | ||
| 247 | :return: tuple (string, index) containing the read string (bytes string), | 274 | :return: tuple (string, index) containing the read string (bytes string), |
| 248 | and the index to start reading from next time. | 275 | and the index to start reading from next time. |
| 249 | """ | 276 | """ |
| 250 | if index is None: | 277 | if index is None: |
| 251 | result = [] | 278 | result = [] |
| 252 | - for count in xrange(STR_MAX_LEN): | 279 | + for _ in xrange(STR_MAX_LEN): |
| 253 | char = data.read(1) | 280 | char = data.read(1) |
| 254 | if char == b'\x00': | 281 | if char == b'\x00': |
| 255 | return b''.join(result), index | 282 | return b''.join(result), index |
| @@ -260,9 +287,10 @@ def read_zero_terminated_ansi_string(data, index): | @@ -260,9 +287,10 @@ def read_zero_terminated_ansi_string(data, index): | ||
| 260 | return data[index:end_idx], end_idx+1 # return index after the 0-byte | 287 | return data[index:end_idx], end_idx+1 # return index after the 0-byte |
| 261 | 288 | ||
| 262 | 289 | ||
| 263 | -# === CLASSES ================================================================ | 290 | +# === CLASSES ================================================================= |
| 264 | 291 | ||
| 265 | -class OleNativeStream (object): | 292 | + |
| 293 | +class OleNativeStream(object): | ||
| 266 | """ | 294 | """ |
| 267 | OLE object contained into an OLENativeStream structure. | 295 | OLE object contained into an OLENativeStream structure. |
| 268 | (see MS-OLEDS 2.3.6 OLENativeStream) | 296 | (see MS-OLEDS 2.3.6 OLENativeStream) |
| @@ -272,7 +300,6 @@ class OleNativeStream (object): | @@ -272,7 +300,6 @@ class OleNativeStream (object): | ||
| 272 | TYPE_LINKED = 0x01 | 300 | TYPE_LINKED = 0x01 |
| 273 | TYPE_EMBEDDED = 0x02 | 301 | TYPE_EMBEDDED = 0x02 |
| 274 | 302 | ||
| 275 | - | ||
| 276 | def __init__(self, bindata=None, package=False): | 303 | def __init__(self, bindata=None, package=False): |
| 277 | """ | 304 | """ |
| 278 | Constructor for OleNativeStream. | 305 | Constructor for OleNativeStream. |
| @@ -322,14 +349,14 @@ class OleNativeStream (object): | @@ -322,14 +349,14 @@ class OleNativeStream (object): | ||
| 322 | .format(self.native_data_size)) | 349 | .format(self.native_data_size)) |
| 323 | # I thought this might be an OLE type specifier ??? | 350 | # I thought this might be an OLE type specifier ??? |
| 324 | self.unknown_short, index = read_uint16(data, index) | 351 | self.unknown_short, index = read_uint16(data, index) |
| 325 | - self.filename, index = read_zero_terminated_ansi_string(data, index) | 352 | + self.filename, index = read_zero_terminated_string(data, index) |
| 326 | # source path | 353 | # source path |
| 327 | - self.src_path, index = read_zero_terminated_ansi_string(data, index) | ||
| 328 | - # TODO I bet these next 8 bytes are a timestamp => FILETIME from olefile | 354 | + self.src_path, index = read_zero_terminated_string(data, index) |
| 355 | + # TODO: I bet these 8 bytes are a timestamp ==> FILETIME from olefile | ||
| 329 | self.unknown_long_1, index = read_uint32(data, index) | 356 | self.unknown_long_1, index = read_uint32(data, index) |
| 330 | self.unknown_long_2, index = read_uint32(data, index) | 357 | self.unknown_long_2, index = read_uint32(data, index) |
| 331 | # temp path? | 358 | # temp path? |
| 332 | - self.temp_path, index = read_zero_terminated_ansi_string(data, index) | 359 | + self.temp_path, index = read_zero_terminated_string(data, index) |
| 333 | # size of the rest of the data | 360 | # size of the rest of the data |
| 334 | try: | 361 | try: |
| 335 | self.actual_size, index = read_uint32(data, index) | 362 | self.actual_size, index = read_uint32(data, index) |
| @@ -338,7 +365,7 @@ class OleNativeStream (object): | @@ -338,7 +365,7 @@ class OleNativeStream (object): | ||
| 338 | else: | 365 | else: |
| 339 | self.data = data[index:index+self.actual_size] | 366 | self.data = data[index:index+self.actual_size] |
| 340 | self.is_link = False | 367 | self.is_link = False |
| 341 | - # TODO: exception when size > remaining data | 368 | + # TODO: there can be extra data, no idea what it is for |
| 342 | # TODO: SLACK DATA | 369 | # TODO: SLACK DATA |
| 343 | except (IOError, struct.error): # no data to read actual_size | 370 | except (IOError, struct.error): # no data to read actual_size |
| 344 | logging.debug('data is not embedded but only a link') | 371 | logging.debug('data is not embedded but only a link') |
| @@ -347,7 +374,7 @@ class OleNativeStream (object): | @@ -347,7 +374,7 @@ class OleNativeStream (object): | ||
| 347 | self.data = None | 374 | self.data = None |
| 348 | 375 | ||
| 349 | 376 | ||
| 350 | -class OleObject (object): | 377 | +class OleObject(object): |
| 351 | """ | 378 | """ |
| 352 | OLE 1.0 Object | 379 | OLE 1.0 Object |
| 353 | 380 | ||
| @@ -359,13 +386,15 @@ class OleObject (object): | @@ -359,13 +386,15 @@ class OleObject (object): | ||
| 359 | TYPE_LINKED = 0x01 | 386 | TYPE_LINKED = 0x01 |
| 360 | TYPE_EMBEDDED = 0x02 | 387 | TYPE_EMBEDDED = 0x02 |
| 361 | 388 | ||
| 362 | - | ||
| 363 | def __init__(self, bindata=None): | 389 | def __init__(self, bindata=None): |
| 364 | """ | 390 | """ |
| 365 | Constructor for OleObject. | 391 | Constructor for OleObject. |
| 366 | If bindata is provided, it will be parsed using the parse() method. | 392 | If bindata is provided, it will be parsed using the parse() method. |
| 367 | 393 | ||
| 368 | - :param bindata: bytes, OLE 1.0 Object structure containing an OLE object | 394 | + :param bindata: bytes, OLE 1.0 Object structure containing OLE object |
| 395 | + | ||
| 396 | + Note: Code can easily by generalized to work with byte streams instead | ||
| 397 | + of arrays just like in OleNativeStream. | ||
| 369 | """ | 398 | """ |
| 370 | self.ole_version = None | 399 | self.ole_version = None |
| 371 | self.format_id = None | 400 | self.format_id = None |
| @@ -374,6 +403,8 @@ class OleObject (object): | @@ -374,6 +403,8 @@ class OleObject (object): | ||
| 374 | self.item_name = None | 403 | self.item_name = None |
| 375 | self.data = None | 404 | self.data = None |
| 376 | self.data_size = None | 405 | self.data_size = None |
| 406 | + if bindata is not None: | ||
| 407 | + self.parse(bindata) | ||
| 377 | 408 | ||
| 378 | def parse(self, data): | 409 | def parse(self, data): |
| 379 | """ | 410 | """ |
| @@ -388,27 +419,29 @@ class OleObject (object): | @@ -388,27 +419,29 @@ class OleObject (object): | ||
| 388 | # print("Parsing OLE object data:") | 419 | # print("Parsing OLE object data:") |
| 389 | # print(hexdump3(data, length=16)) | 420 | # print(hexdump3(data, length=16)) |
| 390 | # Header: see MS-OLEDS 2.2.4 ObjectHeader | 421 | # Header: see MS-OLEDS 2.2.4 ObjectHeader |
| 422 | + index = 0 | ||
| 391 | self.ole_version, index = read_uint32(data, index) | 423 | self.ole_version, index = read_uint32(data, index) |
| 392 | self.format_id, index = read_uint32(data, index) | 424 | self.format_id, index = read_uint32(data, index) |
| 393 | - log.debug('OLE version=%08X - Format ID=%08X' % (self.ole_version, self.format_id)) | 425 | + log.debug('OLE version=%08X - Format ID=%08X' |
| 426 | + % (self.ole_version, self.format_id)) | ||
| 394 | assert self.format_id in (self.TYPE_EMBEDDED, self.TYPE_LINKED) | 427 | assert self.format_id in (self.TYPE_EMBEDDED, self.TYPE_LINKED) |
| 395 | - self.class_name, index = read_LengthPrefixedAnsiString(data, index) | ||
| 396 | - self.topic_name, index = read_LengthPrefixedAnsiString(data, index) | ||
| 397 | - self.item_name, index = read_LengthPrefixedAnsiString(data, index) | 428 | + self.class_name, index = read_length_prefixed_string(data, index) |
| 429 | + self.topic_name, index = read_length_prefixed_string(data, index) | ||
| 430 | + self.item_name, index = read_length_prefixed_string(data, index) | ||
| 398 | log.debug('Class name=%r - Topic name=%r - Item name=%r' | 431 | log.debug('Class name=%r - Topic name=%r - Item name=%r' |
| 399 | - % (self.class_name, self.topic_name, self.item_name)) | 432 | + % (self.class_name, self.topic_name, self.item_name)) |
| 400 | if self.format_id == self.TYPE_EMBEDDED: | 433 | if self.format_id == self.TYPE_EMBEDDED: |
| 401 | # Embedded object: see MS-OLEDS 2.2.5 EmbeddedObject | 434 | # Embedded object: see MS-OLEDS 2.2.5 EmbeddedObject |
| 402 | - #assert self.topic_name != '' and self.item_name != '' | 435 | + # assert self.topic_name != '' and self.item_name != '' |
| 403 | self.data_size, index = read_uint32(data, index) | 436 | self.data_size, index = read_uint32(data, index) |
| 404 | - log.debug('Declared data size=%d - remaining size=%d' % (self.data_size, len(data)-index)) | 437 | + log.debug('Declared data size=%d - remaining size=%d' |
| 438 | + % (self.data_size, len(data)-index)) | ||
| 405 | # TODO: handle incorrect size to avoid exception | 439 | # TODO: handle incorrect size to avoid exception |
| 406 | self.data = data[index:index+self.data_size] | 440 | self.data = data[index:index+self.data_size] |
| 407 | assert len(self.data) == self.data_size | 441 | assert len(self.data) == self.data_size |
| 408 | self.extra_data = data[index+self.data_size:] | 442 | self.extra_data = data[index+self.data_size:] |
| 409 | 443 | ||
| 410 | 444 | ||
| 411 | - | ||
| 412 | def sanitize_filename(filename, replacement='_', max_length=200): | 445 | def sanitize_filename(filename, replacement='_', max_length=200): |
| 413 | """compute basename of filename. Replaces all non-whitelisted characters. | 446 | """compute basename of filename. Replaces all non-whitelisted characters. |
| 414 | The returned filename is always a basename of the file.""" | 447 | The returned filename is always a basename of the file.""" |
| @@ -421,7 +454,7 @@ def sanitize_filename(filename, replacement='_', max_length=200): | @@ -421,7 +454,7 @@ def sanitize_filename(filename, replacement='_', max_length=200): | ||
| 421 | while " " in sane_fname: | 454 | while " " in sane_fname: |
| 422 | sane_fname = sane_fname.replace(' ', ' ') | 455 | sane_fname = sane_fname.replace(' ', ' ') |
| 423 | 456 | ||
| 424 | - if not len(filename): | 457 | + if not filename: |
| 425 | sane_fname = 'NONAME' | 458 | sane_fname = 'NONAME' |
| 426 | 459 | ||
| 427 | # limit filename length | 460 | # limit filename length |
| @@ -507,7 +540,7 @@ def find_ole(filename, data): | @@ -507,7 +540,7 @@ def find_ole(filename, data): | ||
| 507 | yield None # --> leads to non-0 return code but try next file first | 540 | yield None # --> leads to non-0 return code but try next file first |
| 508 | 541 | ||
| 509 | 542 | ||
| 510 | -def process_file(container, filename, data, output_dir=None): | 543 | +def process_file(filename, data, output_dir=None): |
| 511 | """ find embedded objects in given file | 544 | """ find embedded objects in given file |
| 512 | 545 | ||
| 513 | if data is given (from xglob for encrypted zip files), then filename is | 546 | if data is given (from xglob for encrypted zip files), then filename is |
| @@ -530,8 +563,8 @@ def process_file(container, filename, data, output_dir=None): | @@ -530,8 +563,8 @@ def process_file(container, filename, data, output_dir=None): | ||
| 530 | fname_prefix = os.path.join(base_dir, sane_fname) | 563 | fname_prefix = os.path.join(base_dir, sane_fname) |
| 531 | 564 | ||
| 532 | # TODO: option to extract objects to files (false by default) | 565 | # TODO: option to extract objects to files (false by default) |
| 533 | - print ('-'*79) | ||
| 534 | - print ('File: %r' % filename) | 566 | + print('-'*79) |
| 567 | + print('File: %r' % filename) | ||
| 535 | index = 1 | 568 | index = 1 |
| 536 | 569 | ||
| 537 | # do not throw errors but remember them and try continue with other streams | 570 | # do not throw errors but remember them and try continue with other streams |
| @@ -553,10 +586,10 @@ def process_file(container, filename, data, output_dir=None): | @@ -553,10 +586,10 @@ def process_file(container, filename, data, output_dir=None): | ||
| 553 | stream = ole.openstream(path_parts) | 586 | stream = ole.openstream(path_parts) |
| 554 | print('extract file embedded in OLE object from stream %r:' | 587 | print('extract file embedded in OLE object from stream %r:' |
| 555 | % stream_path) | 588 | % stream_path) |
| 556 | - print ('Parsing OLE Package') | 589 | + print('Parsing OLE Package') |
| 557 | opkg = OleNativeStream(stream) | 590 | opkg = OleNativeStream(stream) |
| 558 | # leave stream open until dumping is finished | 591 | # leave stream open until dumping is finished |
| 559 | - except Exception: | 592 | + except Exception as exc: |
| 560 | log.warning('*** Not an OLE 1.0 Object ({0})'.format(exc)) | 593 | log.warning('*** Not an OLE 1.0 Object ({0})'.format(exc)) |
| 561 | err_stream = True | 594 | err_stream = True |
| 562 | if stream is not None: | 595 | if stream is not None: |
| @@ -568,9 +601,9 @@ def process_file(container, filename, data, output_dir=None): | @@ -568,9 +601,9 @@ def process_file(container, filename, data, output_dir=None): | ||
| 568 | log.debug('Object is not embedded but only linked to ' | 601 | log.debug('Object is not embedded but only linked to ' |
| 569 | '- skip') | 602 | '- skip') |
| 570 | continue | 603 | continue |
| 571 | - print ('Filename = %r' % opkg.filename) | ||
| 572 | - print ('Source path = %r' % opkg.src_path) | ||
| 573 | - print ('Temp path = %r' % opkg.temp_path) | 604 | + print('Filename = %r' % opkg.filename) |
| 605 | + print('Source path = %r' % opkg.src_path) | ||
| 606 | + print('Temp path = %r' % opkg.temp_path) | ||
| 574 | if opkg.filename: | 607 | if opkg.filename: |
| 575 | fname = '%s_%s' % (fname_prefix, | 608 | fname = '%s_%s' % (fname_prefix, |
| 576 | sanitize_filename(opkg.filename)) | 609 | sanitize_filename(opkg.filename)) |
| @@ -579,10 +612,10 @@ def process_file(container, filename, data, output_dir=None): | @@ -579,10 +612,10 @@ def process_file(container, filename, data, output_dir=None): | ||
| 579 | 612 | ||
| 580 | # dump | 613 | # dump |
| 581 | try: | 614 | try: |
| 582 | - print ('saving to file %s' % fname) | 615 | + print('saving to file %s' % fname) |
| 583 | with open(fname, 'wb') as writer: | 616 | with open(fname, 'wb') as writer: |
| 584 | n_dumped = 0 | 617 | n_dumped = 0 |
| 585 | - next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size) | 618 | + next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size) |
| 586 | while next_size: | 619 | while next_size: |
| 587 | data = stream.read(next_size) | 620 | data = stream.read(next_size) |
| 588 | writer.write(data) | 621 | writer.write(data) |
| @@ -591,8 +624,8 @@ def process_file(container, filename, data, output_dir=None): | @@ -591,8 +624,8 @@ def process_file(container, filename, data, output_dir=None): | ||
| 591 | logging.warning('Wanted to read {0}, got {1}' | 624 | logging.warning('Wanted to read {0}, got {1}' |
| 592 | .format(next_size, len(data))) | 625 | .format(next_size, len(data))) |
| 593 | break | 626 | break |
| 594 | - next_size = min(DUMP_CHUNK_SIZE, | ||
| 595 | - opkg.actual_size - n_dumped) | 627 | + next_size = min(DUMP_CHUNK_SIZE, |
| 628 | + opkg.actual_size - n_dumped) | ||
| 596 | did_dump = True | 629 | did_dump = True |
| 597 | except Exception as exc: | 630 | except Exception as exc: |
| 598 | log.warning('error dumping to {0} ({1})' | 631 | log.warning('error dumping to {0} ({1})' |
| @@ -605,23 +638,17 @@ def process_file(container, filename, data, output_dir=None): | @@ -605,23 +638,17 @@ def process_file(container, filename, data, output_dir=None): | ||
| 605 | return err_stream, err_dumping, did_dump | 638 | return err_stream, err_dumping, did_dump |
| 606 | 639 | ||
| 607 | 640 | ||
| 608 | -#=== MAIN ================================================================= | 641 | +# === MAIN ==================================================================== |
| 642 | + | ||
| 609 | 643 | ||
| 610 | def main(): | 644 | def main(): |
| 611 | """ main function, called when running this as script """ | 645 | """ main function, called when running this as script """ |
| 612 | # print banner with version | 646 | # print banner with version |
| 613 | - print ('oleobj %s - http://decalage.info/oletools' % __version__) | ||
| 614 | - print ('THIS IS WORK IN PROGRESS - Check updates regularly!') | ||
| 615 | - print ('Please report any issue at https://github.com/decalage2/oletools/issues') | ||
| 616 | - print ('') | ||
| 617 | - | ||
| 618 | - DEFAULT_LOG_LEVEL = "warning" # Default log level | ||
| 619 | - LOG_LEVELS = {'debug': logging.DEBUG, | ||
| 620 | - 'info': logging.INFO, | ||
| 621 | - 'warning': logging.WARNING, | ||
| 622 | - 'error': logging.ERROR, | ||
| 623 | - 'critical': logging.CRITICAL | ||
| 624 | - } | 647 | + print('oleobj %s - http://decalage.info/oletools' % __version__) |
| 648 | + print('THIS IS WORK IN PROGRESS - Check updates regularly!') | ||
| 649 | + print('Please report any issue at ' | ||
| 650 | + 'https://github.com/decalage2/oletools/issues') | ||
| 651 | + print('') | ||
| 625 | 652 | ||
| 626 | usage = 'usage: %prog [options] <filename> [filename2 ...]' | 653 | usage = 'usage: %prog [options] <filename> [filename2 ...]' |
| 627 | parser = optparse.OptionParser(usage=usage) | 654 | parser = optparse.OptionParser(usage=usage) |
| @@ -630,21 +657,31 @@ def main(): | @@ -630,21 +657,31 @@ def main(): | ||
| 630 | # parser.add_option('-c', '--csv', dest='csv', | 657 | # parser.add_option('-c', '--csv', dest='csv', |
| 631 | # help='export results to a CSV file') | 658 | # help='export results to a CSV file') |
| 632 | parser.add_option("-r", action="store_true", dest="recursive", | 659 | parser.add_option("-r", action="store_true", dest="recursive", |
| 633 | - help='find files recursively in subdirectories.') | ||
| 634 | - parser.add_option("-d", type="str", dest="output_dir", | ||
| 635 | - help='use specified directory to output files.', default=None) | ||
| 636 | - parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, | ||
| 637 | - help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') | ||
| 638 | - parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', | ||
| 639 | - help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') | ||
| 640 | - parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, | ||
| 641 | - help="logging level debug/info/warning/error/critical (default=%default)") | 660 | + help='find files recursively in subdirectories.') |
| 661 | + parser.add_option("-d", type="str", dest="output_dir", default=None, | ||
| 662 | + help='use specified directory to output files.') | ||
| 663 | + parser.add_option("-z", "--zip", dest='zip_password', type='str', | ||
| 664 | + default=None, | ||
| 665 | + help='if the file is a zip archive, open first file from' | ||
| 666 | + 'it, using the provided password (requires Python ' | ||
| 667 | + '2.6+)') | ||
| 668 | + parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', | ||
| 669 | + default='*', | ||
| 670 | + help='if the file is a zip archive, file(s) to be opened' | ||
| 671 | + 'within the zip. Wildcards * and ? are supported. ' | ||
| 672 | + '(default:*)') | ||
| 673 | + parser.add_option('-l', '--loglevel', dest="loglevel", action="store", | ||
| 674 | + default=DEFAULT_LOG_LEVEL, | ||
| 675 | + help='logging level debug/info/warning/error/critical ' | ||
| 676 | + '(default=%default)') | ||
| 642 | 677 | ||
| 643 | # options for compatibility with ripOLE | 678 | # options for compatibility with ripOLE |
| 644 | parser.add_option('-i', '--more-input', type='str', default=None, | 679 | parser.add_option('-i', '--more-input', type='str', default=None, |
| 645 | - help='Additional file to parse (same as positional arguments)') | 680 | + help='Additional file to parse (same as positional ' |
| 681 | + 'arguments)') | ||
| 646 | parser.add_option('-v', '--verbose', action='store_true', | 682 | parser.add_option('-v', '--verbose', action='store_true', |
| 647 | - help='verbose mode, set logging to DEBUG (overwrites -l)') | 683 | + help='verbose mode, set logging to DEBUG ' |
| 684 | + '(overwrites -l)') | ||
| 648 | 685 | ||
| 649 | (options, args) = parser.parse_args() | 686 | (options, args) = parser.parse_args() |
| 650 | if options.more_input: | 687 | if options.more_input: |
| @@ -653,7 +690,7 @@ def main(): | @@ -653,7 +690,7 @@ def main(): | ||
| 653 | options.loglevel = 'debug' | 690 | options.loglevel = 'debug' |
| 654 | 691 | ||
| 655 | # Print help if no arguments are passed | 692 | # Print help if no arguments are passed |
| 656 | - if len(args) == 0: | 693 | + if not args: |
| 657 | parser.print_help() | 694 | parser.print_help() |
| 658 | return RETURN_ERR_ARGS | 695 | return RETURN_ERR_ARGS |
| 659 | for filename in args: | 696 | for filename in args: |
| @@ -674,13 +711,15 @@ def main(): | @@ -674,13 +711,15 @@ def main(): | ||
| 674 | any_err_dumping = False | 711 | any_err_dumping = False |
| 675 | any_did_dump = False | 712 | any_did_dump = False |
| 676 | 713 | ||
| 677 | - for container, filename, data in xglob.iter_files(args, recursive=options.recursive, | ||
| 678 | - zip_password=options.zip_password, zip_fname=options.zip_fname): | 714 | + for container, filename, data in \ |
| 715 | + xglob.iter_files(args, recursive=options.recursive, | ||
| 716 | + zip_password=options.zip_password, | ||
| 717 | + zip_fname=options.zip_fname): | ||
| 679 | # ignore directory names stored in zip files: | 718 | # ignore directory names stored in zip files: |
| 680 | if container and filename.endswith('/'): | 719 | if container and filename.endswith('/'): |
| 681 | continue | 720 | continue |
| 682 | err_stream, err_dumping, did_dump = \ | 721 | err_stream, err_dumping, did_dump = \ |
| 683 | - process_file(container, filename, data, options.output_dir) | 722 | + process_file(filename, data, options.output_dir) |
| 684 | any_err_stream |= err_stream | 723 | any_err_stream |= err_stream |
| 685 | any_err_dumping |= err_dumping | 724 | any_err_dumping |= err_dumping |
| 686 | any_did_dump |= did_dump | 725 | any_did_dump |= did_dump |