Commit 670d70755885e7e8c5c0e012ee23db95365d6121
1 parent
1665aeea
oleobj: make pylint and pep8 happier
Most changes are just whitespace or line break or case changes. But: - this did find an actual error (variable exc was used before creation) - did move imports up between license and changelog (although I would prefer it in its original place) - removed the _ansi_ from read_*_ansi_string - move logging constants from main to global scope
Showing
1 changed file
with
160 additions
and
121 deletions
oletools/oleobj.py
| 1 | 1 | #!/usr/bin/env python |
| 2 | -from __future__ import print_function | |
| 3 | 2 | """ |
| 4 | 3 | oleobj.py |
| 5 | 4 | |
| 6 | 5 | oleobj is a Python script and module to parse OLE objects and files stored |
| 7 | -into various file formats such as RTF or MS Office documents (e.g. Word, Excel). | |
| 6 | +into various file formats such as RTF or MS Office documents | |
| 7 | +(e.g. Word, Excel). | |
| 8 | 8 | |
| 9 | 9 | Author: Philippe Lagadec - http://www.decalage.info |
| 10 | 10 | License: BSD, see source code or documentation |
| ... | ... | @@ -13,33 +13,63 @@ oleobj is part of the python-oletools package: |
| 13 | 13 | http://www.decalage.info/python/oletools |
| 14 | 14 | """ |
| 15 | 15 | |
| 16 | -# === LICENSE ================================================================== | |
| 16 | +# === LICENSE ================================================================= | |
| 17 | 17 | |
| 18 | 18 | # oleobj is copyright (c) 2015-2017 Philippe Lagadec (http://www.decalage.info) |
| 19 | 19 | # All rights reserved. |
| 20 | 20 | # |
| 21 | -# Redistribution and use in source and binary forms, with or without modification, | |
| 22 | -# are permitted provided that the following conditions are met: | |
| 21 | +# Redistribution and use in source and binary forms, with or without | |
| 22 | +# modification, are permitted provided that the following conditions are met: | |
| 23 | 23 | # |
| 24 | -# * Redistributions of source code must retain the above copyright notice, this | |
| 25 | -# list of conditions and the following disclaimer. | |
| 24 | +# * Redistributions of source code must retain the above copyright notice, | |
| 25 | +# this list of conditions and the following disclaimer. | |
| 26 | 26 | # * Redistributions in binary form must reproduce the above copyright notice, |
| 27 | 27 | # this list of conditions and the following disclaimer in the documentation |
| 28 | 28 | # and/or other materials provided with the distribution. |
| 29 | 29 | # |
| 30 | -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 31 | -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 32 | -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 33 | -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 34 | -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 35 | -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 36 | -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 37 | -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 38 | -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 39 | -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 40 | - | |
| 41 | - | |
| 42 | -#------------------------------------------------------------------------------ | |
| 30 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
| 31 | +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 32 | +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 33 | +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | |
| 34 | +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
| 35 | +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
| 36 | +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
| 37 | +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
| 38 | +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 39 | +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
| 40 | +# POSSIBILITY OF SUCH DAMAGE. | |
| 41 | + | |
| 42 | + | |
| 43 | +# -- IMPORTS ------------------------------------------------------------------ | |
| 44 | + | |
| 45 | +from __future__ import print_function | |
| 46 | + | |
| 47 | +import logging | |
| 48 | +import struct | |
| 49 | +import optparse | |
| 50 | +import os | |
| 51 | +import re | |
| 52 | +import sys | |
| 53 | +from zipfile import is_zipfile, ZipFile | |
| 54 | + | |
| 55 | +# IMPORTANT: it should be possible to run oletools directly as scripts | |
| 56 | +# in any directory without installing them with pip or setup.py. | |
| 57 | +# In that case, relative imports are NOT usable. | |
| 58 | +# And to enable Python 2+3 compatibility, we need to use absolute imports, | |
| 59 | +# so we add the oletools parent folder to sys.path (absolute+normalized path): | |
| 60 | +try: | |
| 61 | + from oletools.thirdparty import olefile | |
| 62 | +except ImportError: | |
| 63 | + PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname( | |
| 64 | + os.path.abspath(__file__)))) | |
| 65 | + if PARENT_DIR not in sys.path: | |
| 66 | + sys.path.insert(0, PARENT_DIR) | |
| 67 | + del PARENT_DIR | |
| 68 | + from oletools.thirdparty import olefile | |
| 69 | +from oletools.thirdparty import xglob | |
| 70 | +from ppt_record_parser import is_ppt, PptFile, PptRecordExOleVbaActiveXAtom | |
| 71 | + | |
| 72 | +# ----------------------------------------------------------------------------- | |
| 43 | 73 | # CHANGELOG: |
| 44 | 74 | # 2015-12-05 v0.01 PL: - first version |
| 45 | 75 | # 2016-06 PL: - added main and process_file (not working yet) |
| ... | ... | @@ -51,12 +81,12 @@ http://www.decalage.info/python/oletools |
| 51 | 81 | |
| 52 | 82 | __version__ = '0.51' |
| 53 | 83 | |
| 54 | -#------------------------------------------------------------------------------ | |
| 84 | +# ----------------------------------------------------------------------------- | |
| 55 | 85 | # TODO: |
| 56 | 86 | # + setup logging (common with other oletools) |
| 57 | 87 | |
| 58 | 88 | |
| 59 | -#------------------------------------------------------------------------------ | |
| 89 | +# ----------------------------------------------------------------------------- | |
| 60 | 90 | # REFERENCES: |
| 61 | 91 | |
| 62 | 92 | # Reference for the storage of embedded OLE objects/files: |
| ... | ... | @@ -67,38 +97,28 @@ __version__ = '0.51' |
| 67 | 97 | # TODO: oledump |
| 68 | 98 | |
| 69 | 99 | |
| 70 | -#--- IMPORTS ------------------------------------------------------------------ | |
| 71 | - | |
| 72 | -import logging, struct, optparse, os, re, sys | |
| 100 | +# === LOGGING ================================================================= | |
| 73 | 101 | |
| 74 | -# IMPORTANT: it should be possible to run oletools directly as scripts | |
| 75 | -# in any directory without installing them with pip or setup.py. | |
| 76 | -# In that case, relative imports are NOT usable. | |
| 77 | -# And to enable Python 2+3 compatibility, we need to use absolute imports, | |
| 78 | -# so we add the oletools parent folder to sys.path (absolute+normalized path): | |
| 79 | -_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) | |
| 80 | -# print('_thismodule_dir = %r' % _thismodule_dir) | |
| 81 | -_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..')) | |
| 82 | -# print('_parent_dir = %r' % _thirdparty_dir) | |
| 83 | -if not _parent_dir in sys.path: | |
| 84 | - sys.path.insert(0, _parent_dir) | |
| 85 | - | |
| 86 | -from oletools.thirdparty.olefile import olefile | |
| 87 | -from oletools.thirdparty.xglob import xglob | |
| 88 | -from ppt_record_parser import is_ppt, PptFile, PptRecordExOleVbaActiveXAtom | |
| 102 | +DEFAULT_LOG_LEVEL = "warning" | |
| 103 | +LOG_LEVELS = {'debug': logging.DEBUG, | |
| 104 | + 'info': logging.INFO, | |
| 105 | + 'warning': logging.WARNING, | |
| 106 | + 'error': logging.ERROR, | |
| 107 | + 'critical': logging.CRITICAL} | |
| 89 | 108 | |
| 90 | -# === LOGGING ================================================================= | |
| 91 | 109 | |
| 92 | 110 | class NullHandler(logging.Handler): |
| 93 | 111 | """ |
| 94 | 112 | Log Handler without output, to avoid printing messages if logging is not |
| 95 | 113 | configured by the main application. |
| 96 | 114 | Python 2.7 has logging.NullHandler, but this is necessary for 2.6: |
| 97 | - see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library | |
| 115 | + see https://docs.python.org/2.6/library/logging.html section | |
| 116 | + configuring-logging-for-a-library | |
| 98 | 117 | """ |
| 99 | 118 | def emit(self, record): |
| 100 | 119 | pass |
| 101 | 120 | |
| 121 | + | |
| 102 | 122 | def get_logger(name, level=logging.CRITICAL+1): |
| 103 | 123 | """ |
| 104 | 124 | Create a suitable logger object for this module. |
| ... | ... | @@ -111,7 +131,7 @@ def get_logger(name, level=logging.CRITICAL+1): |
| 111 | 131 | # First, test if there is already a logger with the same name, else it |
| 112 | 132 | # will generate duplicate messages (due to duplicate handlers): |
| 113 | 133 | if name in logging.Logger.manager.loggerDict: |
| 114 | - #NOTE: another less intrusive but more "hackish" solution would be to | |
| 134 | + # NOTE: another less intrusive but more "hackish" solution would be to | |
| 115 | 135 | # use getLogger then test if its effective level is not default. |
| 116 | 136 | logger = logging.getLogger(name) |
| 117 | 137 | # make sure level is OK: |
| ... | ... | @@ -125,8 +145,10 @@ def get_logger(name, level=logging.CRITICAL+1): |
| 125 | 145 | logger.setLevel(level) |
| 126 | 146 | return logger |
| 127 | 147 | |
| 148 | + | |
| 128 | 149 | # a global logger object used for debugging: |
| 129 | -log = get_logger('oleobj') | |
| 150 | +log = get_logger('oleobj') # pylint: disable=invalid-name | |
| 151 | + | |
| 130 | 152 | |
| 131 | 153 | def enable_logging(): |
| 132 | 154 | """ |
| ... | ... | @@ -137,7 +159,7 @@ def enable_logging(): |
| 137 | 159 | log.setLevel(logging.NOTSET) |
| 138 | 160 | |
| 139 | 161 | |
| 140 | -# === CONSTANTS ============================================================== | |
| 162 | +# === CONSTANTS =============================================================== | |
| 141 | 163 | |
| 142 | 164 | # some str methods on Python 2.x return characters, |
| 143 | 165 | # while the equivalent bytes methods return integers on Python 3.x: |
| ... | ... | @@ -146,18 +168,19 @@ if sys.version_info[0] <= 2: |
| 146 | 168 | NULL_CHAR = '\x00' |
| 147 | 169 | else: |
| 148 | 170 | # Python 3.x |
| 149 | - NULL_CHAR = 0 | |
| 171 | + NULL_CHAR = 0 # pylint: disable=redefined-variable-type | |
| 172 | + xrange = range # pylint: disable=redefined-builtin, invalid-name | |
| 150 | 173 | |
| 151 | 174 | |
| 152 | -# === GLOBAL VARIABLES ======================================================= | |
| 175 | +# === GLOBAL VARIABLES ======================================================== | |
| 153 | 176 | |
| 154 | 177 | # struct to parse an unsigned integer of 32 bits: |
| 155 | -struct_uint32 = struct.Struct('<L') | |
| 156 | -assert struct_uint32.size == 4 # make sure it matches 4 bytes | |
| 178 | +STRUCT_UINT32 = struct.Struct('<L') | |
| 179 | +assert STRUCT_UINT32.size == 4 # make sure it matches 4 bytes | |
| 157 | 180 | |
| 158 | 181 | # struct to parse an unsigned integer of 16 bits: |
| 159 | -struct_uint16 = struct.Struct('<H') | |
| 160 | -assert struct_uint16.size == 2 # make sure it matches 2 bytes | |
| 182 | +STRUCT_UINT16 = struct.Struct('<H') | |
| 183 | +assert STRUCT_UINT16.size == 2 # make sure it matches 2 bytes | |
| 161 | 184 | |
| 162 | 185 | # max length of a zero-terminated ansi string. Not sure what this really is |
| 163 | 186 | STR_MAX_LEN = 1024 |
| ... | ... | @@ -173,7 +196,9 @@ RETURN_ERR_ARGS = 2 # reserve for OptionParser.parse_args |
| 173 | 196 | RETURN_ERR_STREAM = 4 # error opening/parsing a stream |
| 174 | 197 | RETURN_ERR_DUMP = 8 # error dumping data from stream to file |
| 175 | 198 | |
| 176 | -# === FUNCTIONS ============================================================== | |
| 199 | + | |
| 200 | +# === FUNCTIONS =============================================================== | |
| 201 | + | |
| 177 | 202 | |
| 178 | 203 | def read_uint32(data, index): |
| 179 | 204 | """ |
| ... | ... | @@ -185,9 +210,9 @@ def read_uint32(data, index): |
| 185 | 210 | and the index to continue reading next time. |
| 186 | 211 | """ |
| 187 | 212 | if index is None: |
| 188 | - value = struct_uint32.unpack(data.read(4))[0] | |
| 213 | + value = STRUCT_UINT32.unpack(data.read(4))[0] | |
| 189 | 214 | else: |
| 190 | - value = struct_uint32.unpack(data[index:index+4])[0] | |
| 215 | + value = STRUCT_UINT32.unpack(data[index:index+4])[0] | |
| 191 | 216 | index += 4 |
| 192 | 217 | return (value, index) |
| 193 | 218 | |
| ... | ... | @@ -202,19 +227,20 @@ def read_uint16(data, index): |
| 202 | 227 | and the index to continue reading next time. |
| 203 | 228 | """ |
| 204 | 229 | if index is None: |
| 205 | - value = struct_uint16.unpack(data.read(2))[0] | |
| 230 | + value = STRUCT_UINT16.unpack(data.read(2))[0] | |
| 206 | 231 | else: |
| 207 | - value = struct_uint16.unpack(data[index:index+2])[0] | |
| 232 | + value = STRUCT_UINT16.unpack(data[index:index+2])[0] | |
| 208 | 233 | index += 2 |
| 209 | 234 | return (value, index) |
| 210 | 235 | |
| 211 | 236 | |
| 212 | -def read_LengthPrefixedAnsiString(data, index): | |
| 237 | +def read_length_prefixed_string(data, index): | |
| 213 | 238 | """ |
| 214 | 239 | Read a length-prefixed ANSI string from data. |
| 215 | 240 | |
| 216 | 241 | :param data: bytes string or stream containing the data to be extracted. |
| 217 | - :param index: index in data where string size start or None if data is stream | |
| 242 | + :param index: index in data where string size start or None if data is | |
| 243 | + stream | |
| 218 | 244 | :return: tuple (value, index) containing the read value (bytes string), |
| 219 | 245 | and the index to start reading from next time. |
| 220 | 246 | """ |
| ... | ... | @@ -236,20 +262,21 @@ def read_LengthPrefixedAnsiString(data, index): |
| 236 | 262 | return (ansi_string, index) |
| 237 | 263 | |
| 238 | 264 | |
| 239 | -def read_zero_terminated_ansi_string(data, index): | |
| 265 | +def read_zero_terminated_string(data, index): | |
| 240 | 266 | """ |
| 241 | 267 | Read a zero-terminated ANSI string from data |
| 242 | 268 | |
| 243 | 269 | Guessing that max length is 256 bytes. |
| 244 | 270 | |
| 245 | 271 | :param data: bytes string or stream containing an ansi string |
| 246 | - :param index: index at which the string should start or None if data is stream | |
| 272 | + :param index: index at which the string should start or None if data is | |
| 273 | + stream | |
| 247 | 274 | :return: tuple (string, index) containing the read string (bytes string), |
| 248 | 275 | and the index to start reading from next time. |
| 249 | 276 | """ |
| 250 | 277 | if index is None: |
| 251 | 278 | result = [] |
| 252 | - for count in xrange(STR_MAX_LEN): | |
| 279 | + for _ in xrange(STR_MAX_LEN): | |
| 253 | 280 | char = data.read(1) |
| 254 | 281 | if char == b'\x00': |
| 255 | 282 | return b''.join(result), index |
| ... | ... | @@ -260,9 +287,10 @@ def read_zero_terminated_ansi_string(data, index): |
| 260 | 287 | return data[index:end_idx], end_idx+1 # return index after the 0-byte |
| 261 | 288 | |
| 262 | 289 | |
| 263 | -# === CLASSES ================================================================ | |
| 290 | +# === CLASSES ================================================================= | |
| 264 | 291 | |
| 265 | -class OleNativeStream (object): | |
| 292 | + | |
| 293 | +class OleNativeStream(object): | |
| 266 | 294 | """ |
| 267 | 295 | OLE object contained into an OLENativeStream structure. |
| 268 | 296 | (see MS-OLEDS 2.3.6 OLENativeStream) |
| ... | ... | @@ -272,7 +300,6 @@ class OleNativeStream (object): |
| 272 | 300 | TYPE_LINKED = 0x01 |
| 273 | 301 | TYPE_EMBEDDED = 0x02 |
| 274 | 302 | |
| 275 | - | |
| 276 | 303 | def __init__(self, bindata=None, package=False): |
| 277 | 304 | """ |
| 278 | 305 | Constructor for OleNativeStream. |
| ... | ... | @@ -322,14 +349,14 @@ class OleNativeStream (object): |
| 322 | 349 | .format(self.native_data_size)) |
| 323 | 350 | # I thought this might be an OLE type specifier ??? |
| 324 | 351 | self.unknown_short, index = read_uint16(data, index) |
| 325 | - self.filename, index = read_zero_terminated_ansi_string(data, index) | |
| 352 | + self.filename, index = read_zero_terminated_string(data, index) | |
| 326 | 353 | # source path |
| 327 | - self.src_path, index = read_zero_terminated_ansi_string(data, index) | |
| 328 | - # TODO I bet these next 8 bytes are a timestamp => FILETIME from olefile | |
| 354 | + self.src_path, index = read_zero_terminated_string(data, index) | |
| 355 | + # TODO: I bet these 8 bytes are a timestamp ==> FILETIME from olefile | |
| 329 | 356 | self.unknown_long_1, index = read_uint32(data, index) |
| 330 | 357 | self.unknown_long_2, index = read_uint32(data, index) |
| 331 | 358 | # temp path? |
| 332 | - self.temp_path, index = read_zero_terminated_ansi_string(data, index) | |
| 359 | + self.temp_path, index = read_zero_terminated_string(data, index) | |
| 333 | 360 | # size of the rest of the data |
| 334 | 361 | try: |
| 335 | 362 | self.actual_size, index = read_uint32(data, index) |
| ... | ... | @@ -338,7 +365,7 @@ class OleNativeStream (object): |
| 338 | 365 | else: |
| 339 | 366 | self.data = data[index:index+self.actual_size] |
| 340 | 367 | self.is_link = False |
| 341 | - # TODO: exception when size > remaining data | |
| 368 | + # TODO: there can be extra data, no idea what it is for | |
| 342 | 369 | # TODO: SLACK DATA |
| 343 | 370 | except (IOError, struct.error): # no data to read actual_size |
| 344 | 371 | logging.debug('data is not embedded but only a link') |
| ... | ... | @@ -347,7 +374,7 @@ class OleNativeStream (object): |
| 347 | 374 | self.data = None |
| 348 | 375 | |
| 349 | 376 | |
| 350 | -class OleObject (object): | |
| 377 | +class OleObject(object): | |
| 351 | 378 | """ |
| 352 | 379 | OLE 1.0 Object |
| 353 | 380 | |
| ... | ... | @@ -359,13 +386,15 @@ class OleObject (object): |
| 359 | 386 | TYPE_LINKED = 0x01 |
| 360 | 387 | TYPE_EMBEDDED = 0x02 |
| 361 | 388 | |
| 362 | - | |
| 363 | 389 | def __init__(self, bindata=None): |
| 364 | 390 | """ |
| 365 | 391 | Constructor for OleObject. |
| 366 | 392 | If bindata is provided, it will be parsed using the parse() method. |
| 367 | 393 | |
| 368 | - :param bindata: bytes, OLE 1.0 Object structure containing an OLE object | |
| 394 | + :param bindata: bytes, OLE 1.0 Object structure containing OLE object | |
| 395 | + | |
| 396 | + Note: Code can easily by generalized to work with byte streams instead | |
| 397 | + of arrays just like in OleNativeStream. | |
| 369 | 398 | """ |
| 370 | 399 | self.ole_version = None |
| 371 | 400 | self.format_id = None |
| ... | ... | @@ -374,6 +403,8 @@ class OleObject (object): |
| 374 | 403 | self.item_name = None |
| 375 | 404 | self.data = None |
| 376 | 405 | self.data_size = None |
| 406 | + if bindata is not None: | |
| 407 | + self.parse(bindata) | |
| 377 | 408 | |
| 378 | 409 | def parse(self, data): |
| 379 | 410 | """ |
| ... | ... | @@ -388,27 +419,29 @@ class OleObject (object): |
| 388 | 419 | # print("Parsing OLE object data:") |
| 389 | 420 | # print(hexdump3(data, length=16)) |
| 390 | 421 | # Header: see MS-OLEDS 2.2.4 ObjectHeader |
| 422 | + index = 0 | |
| 391 | 423 | self.ole_version, index = read_uint32(data, index) |
| 392 | 424 | self.format_id, index = read_uint32(data, index) |
| 393 | - log.debug('OLE version=%08X - Format ID=%08X' % (self.ole_version, self.format_id)) | |
| 425 | + log.debug('OLE version=%08X - Format ID=%08X' | |
| 426 | + % (self.ole_version, self.format_id)) | |
| 394 | 427 | assert self.format_id in (self.TYPE_EMBEDDED, self.TYPE_LINKED) |
| 395 | - self.class_name, index = read_LengthPrefixedAnsiString(data, index) | |
| 396 | - self.topic_name, index = read_LengthPrefixedAnsiString(data, index) | |
| 397 | - self.item_name, index = read_LengthPrefixedAnsiString(data, index) | |
| 428 | + self.class_name, index = read_length_prefixed_string(data, index) | |
| 429 | + self.topic_name, index = read_length_prefixed_string(data, index) | |
| 430 | + self.item_name, index = read_length_prefixed_string(data, index) | |
| 398 | 431 | log.debug('Class name=%r - Topic name=%r - Item name=%r' |
| 399 | - % (self.class_name, self.topic_name, self.item_name)) | |
| 432 | + % (self.class_name, self.topic_name, self.item_name)) | |
| 400 | 433 | if self.format_id == self.TYPE_EMBEDDED: |
| 401 | 434 | # Embedded object: see MS-OLEDS 2.2.5 EmbeddedObject |
| 402 | - #assert self.topic_name != '' and self.item_name != '' | |
| 435 | + # assert self.topic_name != '' and self.item_name != '' | |
| 403 | 436 | self.data_size, index = read_uint32(data, index) |
| 404 | - log.debug('Declared data size=%d - remaining size=%d' % (self.data_size, len(data)-index)) | |
| 437 | + log.debug('Declared data size=%d - remaining size=%d' | |
| 438 | + % (self.data_size, len(data)-index)) | |
| 405 | 439 | # TODO: handle incorrect size to avoid exception |
| 406 | 440 | self.data = data[index:index+self.data_size] |
| 407 | 441 | assert len(self.data) == self.data_size |
| 408 | 442 | self.extra_data = data[index+self.data_size:] |
| 409 | 443 | |
| 410 | 444 | |
| 411 | - | |
| 412 | 445 | def sanitize_filename(filename, replacement='_', max_length=200): |
| 413 | 446 | """compute basename of filename. Replaces all non-whitelisted characters. |
| 414 | 447 | The returned filename is always a basename of the file.""" |
| ... | ... | @@ -421,7 +454,7 @@ def sanitize_filename(filename, replacement='_', max_length=200): |
| 421 | 454 | while " " in sane_fname: |
| 422 | 455 | sane_fname = sane_fname.replace(' ', ' ') |
| 423 | 456 | |
| 424 | - if not len(filename): | |
| 457 | + if not filename: | |
| 425 | 458 | sane_fname = 'NONAME' |
| 426 | 459 | |
| 427 | 460 | # limit filename length |
| ... | ... | @@ -507,7 +540,7 @@ def find_ole(filename, data): |
| 507 | 540 | yield None # --> leads to non-0 return code but try next file first |
| 508 | 541 | |
| 509 | 542 | |
| 510 | -def process_file(container, filename, data, output_dir=None): | |
| 543 | +def process_file(filename, data, output_dir=None): | |
| 511 | 544 | """ find embedded objects in given file |
| 512 | 545 | |
| 513 | 546 | if data is given (from xglob for encrypted zip files), then filename is |
| ... | ... | @@ -530,8 +563,8 @@ def process_file(container, filename, data, output_dir=None): |
| 530 | 563 | fname_prefix = os.path.join(base_dir, sane_fname) |
| 531 | 564 | |
| 532 | 565 | # TODO: option to extract objects to files (false by default) |
| 533 | - print ('-'*79) | |
| 534 | - print ('File: %r' % filename) | |
| 566 | + print('-'*79) | |
| 567 | + print('File: %r' % filename) | |
| 535 | 568 | index = 1 |
| 536 | 569 | |
| 537 | 570 | # do not throw errors but remember them and try continue with other streams |
| ... | ... | @@ -553,10 +586,10 @@ def process_file(container, filename, data, output_dir=None): |
| 553 | 586 | stream = ole.openstream(path_parts) |
| 554 | 587 | print('extract file embedded in OLE object from stream %r:' |
| 555 | 588 | % stream_path) |
| 556 | - print ('Parsing OLE Package') | |
| 589 | + print('Parsing OLE Package') | |
| 557 | 590 | opkg = OleNativeStream(stream) |
| 558 | 591 | # leave stream open until dumping is finished |
| 559 | - except Exception: | |
| 592 | + except Exception as exc: | |
| 560 | 593 | log.warning('*** Not an OLE 1.0 Object ({0})'.format(exc)) |
| 561 | 594 | err_stream = True |
| 562 | 595 | if stream is not None: |
| ... | ... | @@ -568,9 +601,9 @@ def process_file(container, filename, data, output_dir=None): |
| 568 | 601 | log.debug('Object is not embedded but only linked to ' |
| 569 | 602 | '- skip') |
| 570 | 603 | continue |
| 571 | - print ('Filename = %r' % opkg.filename) | |
| 572 | - print ('Source path = %r' % opkg.src_path) | |
| 573 | - print ('Temp path = %r' % opkg.temp_path) | |
| 604 | + print('Filename = %r' % opkg.filename) | |
| 605 | + print('Source path = %r' % opkg.src_path) | |
| 606 | + print('Temp path = %r' % opkg.temp_path) | |
| 574 | 607 | if opkg.filename: |
| 575 | 608 | fname = '%s_%s' % (fname_prefix, |
| 576 | 609 | sanitize_filename(opkg.filename)) |
| ... | ... | @@ -579,10 +612,10 @@ def process_file(container, filename, data, output_dir=None): |
| 579 | 612 | |
| 580 | 613 | # dump |
| 581 | 614 | try: |
| 582 | - print ('saving to file %s' % fname) | |
| 615 | + print('saving to file %s' % fname) | |
| 583 | 616 | with open(fname, 'wb') as writer: |
| 584 | 617 | n_dumped = 0 |
| 585 | - next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size) | |
| 618 | + next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size) | |
| 586 | 619 | while next_size: |
| 587 | 620 | data = stream.read(next_size) |
| 588 | 621 | writer.write(data) |
| ... | ... | @@ -591,8 +624,8 @@ def process_file(container, filename, data, output_dir=None): |
| 591 | 624 | logging.warning('Wanted to read {0}, got {1}' |
| 592 | 625 | .format(next_size, len(data))) |
| 593 | 626 | break |
| 594 | - next_size = min(DUMP_CHUNK_SIZE, | |
| 595 | - opkg.actual_size - n_dumped) | |
| 627 | + next_size = min(DUMP_CHUNK_SIZE, | |
| 628 | + opkg.actual_size - n_dumped) | |
| 596 | 629 | did_dump = True |
| 597 | 630 | except Exception as exc: |
| 598 | 631 | log.warning('error dumping to {0} ({1})' |
| ... | ... | @@ -605,23 +638,17 @@ def process_file(container, filename, data, output_dir=None): |
| 605 | 638 | return err_stream, err_dumping, did_dump |
| 606 | 639 | |
| 607 | 640 | |
| 608 | -#=== MAIN ================================================================= | |
| 641 | +# === MAIN ==================================================================== | |
| 642 | + | |
| 609 | 643 | |
| 610 | 644 | def main(): |
| 611 | 645 | """ main function, called when running this as script """ |
| 612 | 646 | # print banner with version |
| 613 | - print ('oleobj %s - http://decalage.info/oletools' % __version__) | |
| 614 | - print ('THIS IS WORK IN PROGRESS - Check updates regularly!') | |
| 615 | - print ('Please report any issue at https://github.com/decalage2/oletools/issues') | |
| 616 | - print ('') | |
| 617 | - | |
| 618 | - DEFAULT_LOG_LEVEL = "warning" # Default log level | |
| 619 | - LOG_LEVELS = {'debug': logging.DEBUG, | |
| 620 | - 'info': logging.INFO, | |
| 621 | - 'warning': logging.WARNING, | |
| 622 | - 'error': logging.ERROR, | |
| 623 | - 'critical': logging.CRITICAL | |
| 624 | - } | |
| 647 | + print('oleobj %s - http://decalage.info/oletools' % __version__) | |
| 648 | + print('THIS IS WORK IN PROGRESS - Check updates regularly!') | |
| 649 | + print('Please report any issue at ' | |
| 650 | + 'https://github.com/decalage2/oletools/issues') | |
| 651 | + print('') | |
| 625 | 652 | |
| 626 | 653 | usage = 'usage: %prog [options] <filename> [filename2 ...]' |
| 627 | 654 | parser = optparse.OptionParser(usage=usage) |
| ... | ... | @@ -630,21 +657,31 @@ def main(): |
| 630 | 657 | # parser.add_option('-c', '--csv', dest='csv', |
| 631 | 658 | # help='export results to a CSV file') |
| 632 | 659 | parser.add_option("-r", action="store_true", dest="recursive", |
| 633 | - help='find files recursively in subdirectories.') | |
| 634 | - parser.add_option("-d", type="str", dest="output_dir", | |
| 635 | - help='use specified directory to output files.', default=None) | |
| 636 | - parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, | |
| 637 | - help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') | |
| 638 | - parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', | |
| 639 | - help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') | |
| 640 | - parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, | |
| 641 | - help="logging level debug/info/warning/error/critical (default=%default)") | |
| 660 | + help='find files recursively in subdirectories.') | |
| 661 | + parser.add_option("-d", type="str", dest="output_dir", default=None, | |
| 662 | + help='use specified directory to output files.') | |
| 663 | + parser.add_option("-z", "--zip", dest='zip_password', type='str', | |
| 664 | + default=None, | |
| 665 | + help='if the file is a zip archive, open first file from' | |
| 666 | + 'it, using the provided password (requires Python ' | |
| 667 | + '2.6+)') | |
| 668 | + parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', | |
| 669 | + default='*', | |
| 670 | + help='if the file is a zip archive, file(s) to be opened' | |
| 671 | + 'within the zip. Wildcards * and ? are supported. ' | |
| 672 | + '(default:*)') | |
| 673 | + parser.add_option('-l', '--loglevel', dest="loglevel", action="store", | |
| 674 | + default=DEFAULT_LOG_LEVEL, | |
| 675 | + help='logging level debug/info/warning/error/critical ' | |
| 676 | + '(default=%default)') | |
| 642 | 677 | |
| 643 | 678 | # options for compatibility with ripOLE |
| 644 | 679 | parser.add_option('-i', '--more-input', type='str', default=None, |
| 645 | - help='Additional file to parse (same as positional arguments)') | |
| 680 | + help='Additional file to parse (same as positional ' | |
| 681 | + 'arguments)') | |
| 646 | 682 | parser.add_option('-v', '--verbose', action='store_true', |
| 647 | - help='verbose mode, set logging to DEBUG (overwrites -l)') | |
| 683 | + help='verbose mode, set logging to DEBUG ' | |
| 684 | + '(overwrites -l)') | |
| 648 | 685 | |
| 649 | 686 | (options, args) = parser.parse_args() |
| 650 | 687 | if options.more_input: |
| ... | ... | @@ -653,7 +690,7 @@ def main(): |
| 653 | 690 | options.loglevel = 'debug' |
| 654 | 691 | |
| 655 | 692 | # Print help if no arguments are passed |
| 656 | - if len(args) == 0: | |
| 693 | + if not args: | |
| 657 | 694 | parser.print_help() |
| 658 | 695 | return RETURN_ERR_ARGS |
| 659 | 696 | for filename in args: |
| ... | ... | @@ -674,13 +711,15 @@ def main(): |
| 674 | 711 | any_err_dumping = False |
| 675 | 712 | any_did_dump = False |
| 676 | 713 | |
| 677 | - for container, filename, data in xglob.iter_files(args, recursive=options.recursive, | |
| 678 | - zip_password=options.zip_password, zip_fname=options.zip_fname): | |
| 714 | + for container, filename, data in \ | |
| 715 | + xglob.iter_files(args, recursive=options.recursive, | |
| 716 | + zip_password=options.zip_password, | |
| 717 | + zip_fname=options.zip_fname): | |
| 679 | 718 | # ignore directory names stored in zip files: |
| 680 | 719 | if container and filename.endswith('/'): |
| 681 | 720 | continue |
| 682 | 721 | err_stream, err_dumping, did_dump = \ |
| 683 | - process_file(container, filename, data, options.output_dir) | |
| 722 | + process_file(filename, data, options.output_dir) | |
| 684 | 723 | any_err_stream |= err_stream |
| 685 | 724 | any_err_dumping |= err_dumping |
| 686 | 725 | any_did_dump |= did_dump | ... | ... |