Commit 670d70755885e7e8c5c0e012ee23db95365d6121

Authored by Christian Herdtweck
1 parent 1665aeea

oleobj: make pylint and pep8 happier

Most changes are just whitespace or line break or case changes. But:
- this did find an actual error (variable exc was used before creation)
- did move imports up between license and changelog (although I would prefer
it in its original place)
- removed the _ansi_ from read_*_ansi_string
- move logging constants from main to global scope
Showing 1 changed file with 160 additions and 121 deletions
oletools/oleobj.py
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 -from __future__ import print_function  
3 """ 2 """
4 oleobj.py 3 oleobj.py
5 4
6 oleobj is a Python script and module to parse OLE objects and files stored 5 oleobj is a Python script and module to parse OLE objects and files stored
7 -into various file formats such as RTF or MS Office documents (e.g. Word, Excel). 6 +into various file formats such as RTF or MS Office documents
  7 +(e.g. Word, Excel).
8 8
9 Author: Philippe Lagadec - http://www.decalage.info 9 Author: Philippe Lagadec - http://www.decalage.info
10 License: BSD, see source code or documentation 10 License: BSD, see source code or documentation
@@ -13,33 +13,63 @@ oleobj is part of the python-oletools package: @@ -13,33 +13,63 @@ oleobj is part of the python-oletools package:
13 http://www.decalage.info/python/oletools 13 http://www.decalage.info/python/oletools
14 """ 14 """
15 15
16 -# === LICENSE ================================================================== 16 +# === LICENSE =================================================================
17 17
18 # oleobj is copyright (c) 2015-2017 Philippe Lagadec (http://www.decalage.info) 18 # oleobj is copyright (c) 2015-2017 Philippe Lagadec (http://www.decalage.info)
19 # All rights reserved. 19 # All rights reserved.
20 # 20 #
21 -# Redistribution and use in source and binary forms, with or without modification,  
22 -# are permitted provided that the following conditions are met: 21 +# Redistribution and use in source and binary forms, with or without
  22 +# modification, are permitted provided that the following conditions are met:
23 # 23 #
24 -# * Redistributions of source code must retain the above copyright notice, this  
25 -# list of conditions and the following disclaimer. 24 +# * Redistributions of source code must retain the above copyright notice,
  25 +# this list of conditions and the following disclaimer.
26 # * Redistributions in binary form must reproduce the above copyright notice, 26 # * Redistributions in binary form must reproduce the above copyright notice,
27 # this list of conditions and the following disclaimer in the documentation 27 # this list of conditions and the following disclaimer in the documentation
28 # and/or other materials provided with the distribution. 28 # and/or other materials provided with the distribution.
29 # 29 #
30 -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND  
31 -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED  
32 -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE  
33 -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE  
34 -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  
35 -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR  
36 -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER  
37 -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,  
38 -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  
39 -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
40 -  
41 -  
42 -#------------------------------------------------------------------------------ 30 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  31 +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  32 +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33 +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  34 +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  35 +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  36 +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  37 +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  38 +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  39 +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  40 +# POSSIBILITY OF SUCH DAMAGE.
  41 +
  42 +
  43 +# -- IMPORTS ------------------------------------------------------------------
  44 +
  45 +from __future__ import print_function
  46 +
  47 +import logging
  48 +import struct
  49 +import optparse
  50 +import os
  51 +import re
  52 +import sys
  53 +from zipfile import is_zipfile, ZipFile
  54 +
  55 +# IMPORTANT: it should be possible to run oletools directly as scripts
  56 +# in any directory without installing them with pip or setup.py.
  57 +# In that case, relative imports are NOT usable.
  58 +# And to enable Python 2+3 compatibility, we need to use absolute imports,
  59 +# so we add the oletools parent folder to sys.path (absolute+normalized path):
  60 +try:
  61 + from oletools.thirdparty import olefile
  62 +except ImportError:
  63 + PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname(
  64 + os.path.abspath(__file__))))
  65 + if PARENT_DIR not in sys.path:
  66 + sys.path.insert(0, PARENT_DIR)
  67 + del PARENT_DIR
  68 + from oletools.thirdparty import olefile
  69 +from oletools.thirdparty import xglob
  70 +from ppt_record_parser import is_ppt, PptFile, PptRecordExOleVbaActiveXAtom
  71 +
  72 +# -----------------------------------------------------------------------------
43 # CHANGELOG: 73 # CHANGELOG:
44 # 2015-12-05 v0.01 PL: - first version 74 # 2015-12-05 v0.01 PL: - first version
45 # 2016-06 PL: - added main and process_file (not working yet) 75 # 2016-06 PL: - added main and process_file (not working yet)
@@ -51,12 +81,12 @@ http://www.decalage.info/python/oletools @@ -51,12 +81,12 @@ http://www.decalage.info/python/oletools
51 81
52 __version__ = '0.51' 82 __version__ = '0.51'
53 83
54 -#------------------------------------------------------------------------------ 84 +# -----------------------------------------------------------------------------
55 # TODO: 85 # TODO:
56 # + setup logging (common with other oletools) 86 # + setup logging (common with other oletools)
57 87
58 88
59 -#------------------------------------------------------------------------------ 89 +# -----------------------------------------------------------------------------
60 # REFERENCES: 90 # REFERENCES:
61 91
62 # Reference for the storage of embedded OLE objects/files: 92 # Reference for the storage of embedded OLE objects/files:
@@ -67,38 +97,28 @@ __version__ = '0.51' @@ -67,38 +97,28 @@ __version__ = '0.51'
67 # TODO: oledump 97 # TODO: oledump
68 98
69 99
70 -#--- IMPORTS ------------------------------------------------------------------  
71 -  
72 -import logging, struct, optparse, os, re, sys 100 +# === LOGGING =================================================================
73 101
74 -# IMPORTANT: it should be possible to run oletools directly as scripts  
75 -# in any directory without installing them with pip or setup.py.  
76 -# In that case, relative imports are NOT usable.  
77 -# And to enable Python 2+3 compatibility, we need to use absolute imports,  
78 -# so we add the oletools parent folder to sys.path (absolute+normalized path):  
79 -_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))  
80 -# print('_thismodule_dir = %r' % _thismodule_dir)  
81 -_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))  
82 -# print('_parent_dir = %r' % _thirdparty_dir)  
83 -if not _parent_dir in sys.path:  
84 - sys.path.insert(0, _parent_dir)  
85 -  
86 -from oletools.thirdparty.olefile import olefile  
87 -from oletools.thirdparty.xglob import xglob  
88 -from ppt_record_parser import is_ppt, PptFile, PptRecordExOleVbaActiveXAtom 102 +DEFAULT_LOG_LEVEL = "warning"
  103 +LOG_LEVELS = {'debug': logging.DEBUG,
  104 + 'info': logging.INFO,
  105 + 'warning': logging.WARNING,
  106 + 'error': logging.ERROR,
  107 + 'critical': logging.CRITICAL}
89 108
90 -# === LOGGING =================================================================  
91 109
92 class NullHandler(logging.Handler): 110 class NullHandler(logging.Handler):
93 """ 111 """
94 Log Handler without output, to avoid printing messages if logging is not 112 Log Handler without output, to avoid printing messages if logging is not
95 configured by the main application. 113 configured by the main application.
96 Python 2.7 has logging.NullHandler, but this is necessary for 2.6: 114 Python 2.7 has logging.NullHandler, but this is necessary for 2.6:
97 - see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library 115 + see https://docs.python.org/2.6/library/logging.html section
  116 + configuring-logging-for-a-library
98 """ 117 """
99 def emit(self, record): 118 def emit(self, record):
100 pass 119 pass
101 120
  121 +
102 def get_logger(name, level=logging.CRITICAL+1): 122 def get_logger(name, level=logging.CRITICAL+1):
103 """ 123 """
104 Create a suitable logger object for this module. 124 Create a suitable logger object for this module.
@@ -111,7 +131,7 @@ def get_logger(name, level=logging.CRITICAL+1): @@ -111,7 +131,7 @@ def get_logger(name, level=logging.CRITICAL+1):
111 # First, test if there is already a logger with the same name, else it 131 # First, test if there is already a logger with the same name, else it
112 # will generate duplicate messages (due to duplicate handlers): 132 # will generate duplicate messages (due to duplicate handlers):
113 if name in logging.Logger.manager.loggerDict: 133 if name in logging.Logger.manager.loggerDict:
114 - #NOTE: another less intrusive but more "hackish" solution would be to 134 + # NOTE: another less intrusive but more "hackish" solution would be to
115 # use getLogger then test if its effective level is not default. 135 # use getLogger then test if its effective level is not default.
116 logger = logging.getLogger(name) 136 logger = logging.getLogger(name)
117 # make sure level is OK: 137 # make sure level is OK:
@@ -125,8 +145,10 @@ def get_logger(name, level=logging.CRITICAL+1): @@ -125,8 +145,10 @@ def get_logger(name, level=logging.CRITICAL+1):
125 logger.setLevel(level) 145 logger.setLevel(level)
126 return logger 146 return logger
127 147
  148 +
128 # a global logger object used for debugging: 149 # a global logger object used for debugging:
129 -log = get_logger('oleobj') 150 +log = get_logger('oleobj') # pylint: disable=invalid-name
  151 +
130 152
131 def enable_logging(): 153 def enable_logging():
132 """ 154 """
@@ -137,7 +159,7 @@ def enable_logging(): @@ -137,7 +159,7 @@ def enable_logging():
137 log.setLevel(logging.NOTSET) 159 log.setLevel(logging.NOTSET)
138 160
139 161
140 -# === CONSTANTS ============================================================== 162 +# === CONSTANTS ===============================================================
141 163
142 # some str methods on Python 2.x return characters, 164 # some str methods on Python 2.x return characters,
143 # while the equivalent bytes methods return integers on Python 3.x: 165 # while the equivalent bytes methods return integers on Python 3.x:
@@ -146,18 +168,19 @@ if sys.version_info[0] <= 2: @@ -146,18 +168,19 @@ if sys.version_info[0] <= 2:
146 NULL_CHAR = '\x00' 168 NULL_CHAR = '\x00'
147 else: 169 else:
148 # Python 3.x 170 # Python 3.x
149 - NULL_CHAR = 0 171 + NULL_CHAR = 0 # pylint: disable=redefined-variable-type
  172 + xrange = range # pylint: disable=redefined-builtin, invalid-name
150 173
151 174
152 -# === GLOBAL VARIABLES ======================================================= 175 +# === GLOBAL VARIABLES ========================================================
153 176
154 # struct to parse an unsigned integer of 32 bits: 177 # struct to parse an unsigned integer of 32 bits:
155 -struct_uint32 = struct.Struct('<L')  
156 -assert struct_uint32.size == 4 # make sure it matches 4 bytes 178 +STRUCT_UINT32 = struct.Struct('<L')
  179 +assert STRUCT_UINT32.size == 4 # make sure it matches 4 bytes
157 180
158 # struct to parse an unsigned integer of 16 bits: 181 # struct to parse an unsigned integer of 16 bits:
159 -struct_uint16 = struct.Struct('<H')  
160 -assert struct_uint16.size == 2 # make sure it matches 2 bytes 182 +STRUCT_UINT16 = struct.Struct('<H')
  183 +assert STRUCT_UINT16.size == 2 # make sure it matches 2 bytes
161 184
162 # max length of a zero-terminated ansi string. Not sure what this really is 185 # max length of a zero-terminated ansi string. Not sure what this really is
163 STR_MAX_LEN = 1024 186 STR_MAX_LEN = 1024
@@ -173,7 +196,9 @@ RETURN_ERR_ARGS = 2 # reserve for OptionParser.parse_args @@ -173,7 +196,9 @@ RETURN_ERR_ARGS = 2 # reserve for OptionParser.parse_args
173 RETURN_ERR_STREAM = 4 # error opening/parsing a stream 196 RETURN_ERR_STREAM = 4 # error opening/parsing a stream
174 RETURN_ERR_DUMP = 8 # error dumping data from stream to file 197 RETURN_ERR_DUMP = 8 # error dumping data from stream to file
175 198
176 -# === FUNCTIONS ============================================================== 199 +
  200 +# === FUNCTIONS ===============================================================
  201 +
177 202
178 def read_uint32(data, index): 203 def read_uint32(data, index):
179 """ 204 """
@@ -185,9 +210,9 @@ def read_uint32(data, index): @@ -185,9 +210,9 @@ def read_uint32(data, index):
185 and the index to continue reading next time. 210 and the index to continue reading next time.
186 """ 211 """
187 if index is None: 212 if index is None:
188 - value = struct_uint32.unpack(data.read(4))[0] 213 + value = STRUCT_UINT32.unpack(data.read(4))[0]
189 else: 214 else:
190 - value = struct_uint32.unpack(data[index:index+4])[0] 215 + value = STRUCT_UINT32.unpack(data[index:index+4])[0]
191 index += 4 216 index += 4
192 return (value, index) 217 return (value, index)
193 218
@@ -202,19 +227,20 @@ def read_uint16(data, index): @@ -202,19 +227,20 @@ def read_uint16(data, index):
202 and the index to continue reading next time. 227 and the index to continue reading next time.
203 """ 228 """
204 if index is None: 229 if index is None:
205 - value = struct_uint16.unpack(data.read(2))[0] 230 + value = STRUCT_UINT16.unpack(data.read(2))[0]
206 else: 231 else:
207 - value = struct_uint16.unpack(data[index:index+2])[0] 232 + value = STRUCT_UINT16.unpack(data[index:index+2])[0]
208 index += 2 233 index += 2
209 return (value, index) 234 return (value, index)
210 235
211 236
212 -def read_LengthPrefixedAnsiString(data, index): 237 +def read_length_prefixed_string(data, index):
213 """ 238 """
214 Read a length-prefixed ANSI string from data. 239 Read a length-prefixed ANSI string from data.
215 240
216 :param data: bytes string or stream containing the data to be extracted. 241 :param data: bytes string or stream containing the data to be extracted.
217 - :param index: index in data where string size start or None if data is stream 242 + :param index: index in data where string size start or None if data is
  243 + stream
218 :return: tuple (value, index) containing the read value (bytes string), 244 :return: tuple (value, index) containing the read value (bytes string),
219 and the index to start reading from next time. 245 and the index to start reading from next time.
220 """ 246 """
@@ -236,20 +262,21 @@ def read_LengthPrefixedAnsiString(data, index): @@ -236,20 +262,21 @@ def read_LengthPrefixedAnsiString(data, index):
236 return (ansi_string, index) 262 return (ansi_string, index)
237 263
238 264
239 -def read_zero_terminated_ansi_string(data, index): 265 +def read_zero_terminated_string(data, index):
240 """ 266 """
241 Read a zero-terminated ANSI string from data 267 Read a zero-terminated ANSI string from data
242 268
243 Guessing that max length is 256 bytes. 269 Guessing that max length is 256 bytes.
244 270
245 :param data: bytes string or stream containing an ansi string 271 :param data: bytes string or stream containing an ansi string
246 - :param index: index at which the string should start or None if data is stream 272 + :param index: index at which the string should start or None if data is
  273 + stream
247 :return: tuple (string, index) containing the read string (bytes string), 274 :return: tuple (string, index) containing the read string (bytes string),
248 and the index to start reading from next time. 275 and the index to start reading from next time.
249 """ 276 """
250 if index is None: 277 if index is None:
251 result = [] 278 result = []
252 - for count in xrange(STR_MAX_LEN): 279 + for _ in xrange(STR_MAX_LEN):
253 char = data.read(1) 280 char = data.read(1)
254 if char == b'\x00': 281 if char == b'\x00':
255 return b''.join(result), index 282 return b''.join(result), index
@@ -260,9 +287,10 @@ def read_zero_terminated_ansi_string(data, index): @@ -260,9 +287,10 @@ def read_zero_terminated_ansi_string(data, index):
260 return data[index:end_idx], end_idx+1 # return index after the 0-byte 287 return data[index:end_idx], end_idx+1 # return index after the 0-byte
261 288
262 289
263 -# === CLASSES ================================================================ 290 +# === CLASSES =================================================================
264 291
265 -class OleNativeStream (object): 292 +
  293 +class OleNativeStream(object):
266 """ 294 """
267 OLE object contained into an OLENativeStream structure. 295 OLE object contained into an OLENativeStream structure.
268 (see MS-OLEDS 2.3.6 OLENativeStream) 296 (see MS-OLEDS 2.3.6 OLENativeStream)
@@ -272,7 +300,6 @@ class OleNativeStream (object): @@ -272,7 +300,6 @@ class OleNativeStream (object):
272 TYPE_LINKED = 0x01 300 TYPE_LINKED = 0x01
273 TYPE_EMBEDDED = 0x02 301 TYPE_EMBEDDED = 0x02
274 302
275 -  
276 def __init__(self, bindata=None, package=False): 303 def __init__(self, bindata=None, package=False):
277 """ 304 """
278 Constructor for OleNativeStream. 305 Constructor for OleNativeStream.
@@ -322,14 +349,14 @@ class OleNativeStream (object): @@ -322,14 +349,14 @@ class OleNativeStream (object):
322 .format(self.native_data_size)) 349 .format(self.native_data_size))
323 # I thought this might be an OLE type specifier ??? 350 # I thought this might be an OLE type specifier ???
324 self.unknown_short, index = read_uint16(data, index) 351 self.unknown_short, index = read_uint16(data, index)
325 - self.filename, index = read_zero_terminated_ansi_string(data, index) 352 + self.filename, index = read_zero_terminated_string(data, index)
326 # source path 353 # source path
327 - self.src_path, index = read_zero_terminated_ansi_string(data, index)  
328 - # TODO I bet these next 8 bytes are a timestamp => FILETIME from olefile 354 + self.src_path, index = read_zero_terminated_string(data, index)
  355 + # TODO: I bet these 8 bytes are a timestamp ==> FILETIME from olefile
329 self.unknown_long_1, index = read_uint32(data, index) 356 self.unknown_long_1, index = read_uint32(data, index)
330 self.unknown_long_2, index = read_uint32(data, index) 357 self.unknown_long_2, index = read_uint32(data, index)
331 # temp path? 358 # temp path?
332 - self.temp_path, index = read_zero_terminated_ansi_string(data, index) 359 + self.temp_path, index = read_zero_terminated_string(data, index)
333 # size of the rest of the data 360 # size of the rest of the data
334 try: 361 try:
335 self.actual_size, index = read_uint32(data, index) 362 self.actual_size, index = read_uint32(data, index)
@@ -338,7 +365,7 @@ class OleNativeStream (object): @@ -338,7 +365,7 @@ class OleNativeStream (object):
338 else: 365 else:
339 self.data = data[index:index+self.actual_size] 366 self.data = data[index:index+self.actual_size]
340 self.is_link = False 367 self.is_link = False
341 - # TODO: exception when size > remaining data 368 + # TODO: there can be extra data, no idea what it is for
342 # TODO: SLACK DATA 369 # TODO: SLACK DATA
343 except (IOError, struct.error): # no data to read actual_size 370 except (IOError, struct.error): # no data to read actual_size
344 logging.debug('data is not embedded but only a link') 371 logging.debug('data is not embedded but only a link')
@@ -347,7 +374,7 @@ class OleNativeStream (object): @@ -347,7 +374,7 @@ class OleNativeStream (object):
347 self.data = None 374 self.data = None
348 375
349 376
350 -class OleObject (object): 377 +class OleObject(object):
351 """ 378 """
352 OLE 1.0 Object 379 OLE 1.0 Object
353 380
@@ -359,13 +386,15 @@ class OleObject (object): @@ -359,13 +386,15 @@ class OleObject (object):
359 TYPE_LINKED = 0x01 386 TYPE_LINKED = 0x01
360 TYPE_EMBEDDED = 0x02 387 TYPE_EMBEDDED = 0x02
361 388
362 -  
363 def __init__(self, bindata=None): 389 def __init__(self, bindata=None):
364 """ 390 """
365 Constructor for OleObject. 391 Constructor for OleObject.
366 If bindata is provided, it will be parsed using the parse() method. 392 If bindata is provided, it will be parsed using the parse() method.
367 393
368 - :param bindata: bytes, OLE 1.0 Object structure containing an OLE object 394 + :param bindata: bytes, OLE 1.0 Object structure containing OLE object
  395 +
  396 + Note: Code can easily by generalized to work with byte streams instead
  397 + of arrays just like in OleNativeStream.
369 """ 398 """
370 self.ole_version = None 399 self.ole_version = None
371 self.format_id = None 400 self.format_id = None
@@ -374,6 +403,8 @@ class OleObject (object): @@ -374,6 +403,8 @@ class OleObject (object):
374 self.item_name = None 403 self.item_name = None
375 self.data = None 404 self.data = None
376 self.data_size = None 405 self.data_size = None
  406 + if bindata is not None:
  407 + self.parse(bindata)
377 408
378 def parse(self, data): 409 def parse(self, data):
379 """ 410 """
@@ -388,27 +419,29 @@ class OleObject (object): @@ -388,27 +419,29 @@ class OleObject (object):
388 # print("Parsing OLE object data:") 419 # print("Parsing OLE object data:")
389 # print(hexdump3(data, length=16)) 420 # print(hexdump3(data, length=16))
390 # Header: see MS-OLEDS 2.2.4 ObjectHeader 421 # Header: see MS-OLEDS 2.2.4 ObjectHeader
  422 + index = 0
391 self.ole_version, index = read_uint32(data, index) 423 self.ole_version, index = read_uint32(data, index)
392 self.format_id, index = read_uint32(data, index) 424 self.format_id, index = read_uint32(data, index)
393 - log.debug('OLE version=%08X - Format ID=%08X' % (self.ole_version, self.format_id)) 425 + log.debug('OLE version=%08X - Format ID=%08X'
  426 + % (self.ole_version, self.format_id))
394 assert self.format_id in (self.TYPE_EMBEDDED, self.TYPE_LINKED) 427 assert self.format_id in (self.TYPE_EMBEDDED, self.TYPE_LINKED)
395 - self.class_name, index = read_LengthPrefixedAnsiString(data, index)  
396 - self.topic_name, index = read_LengthPrefixedAnsiString(data, index)  
397 - self.item_name, index = read_LengthPrefixedAnsiString(data, index) 428 + self.class_name, index = read_length_prefixed_string(data, index)
  429 + self.topic_name, index = read_length_prefixed_string(data, index)
  430 + self.item_name, index = read_length_prefixed_string(data, index)
398 log.debug('Class name=%r - Topic name=%r - Item name=%r' 431 log.debug('Class name=%r - Topic name=%r - Item name=%r'
399 - % (self.class_name, self.topic_name, self.item_name)) 432 + % (self.class_name, self.topic_name, self.item_name))
400 if self.format_id == self.TYPE_EMBEDDED: 433 if self.format_id == self.TYPE_EMBEDDED:
401 # Embedded object: see MS-OLEDS 2.2.5 EmbeddedObject 434 # Embedded object: see MS-OLEDS 2.2.5 EmbeddedObject
402 - #assert self.topic_name != '' and self.item_name != '' 435 + # assert self.topic_name != '' and self.item_name != ''
403 self.data_size, index = read_uint32(data, index) 436 self.data_size, index = read_uint32(data, index)
404 - log.debug('Declared data size=%d - remaining size=%d' % (self.data_size, len(data)-index)) 437 + log.debug('Declared data size=%d - remaining size=%d'
  438 + % (self.data_size, len(data)-index))
405 # TODO: handle incorrect size to avoid exception 439 # TODO: handle incorrect size to avoid exception
406 self.data = data[index:index+self.data_size] 440 self.data = data[index:index+self.data_size]
407 assert len(self.data) == self.data_size 441 assert len(self.data) == self.data_size
408 self.extra_data = data[index+self.data_size:] 442 self.extra_data = data[index+self.data_size:]
409 443
410 444
411 -  
412 def sanitize_filename(filename, replacement='_', max_length=200): 445 def sanitize_filename(filename, replacement='_', max_length=200):
413 """compute basename of filename. Replaces all non-whitelisted characters. 446 """compute basename of filename. Replaces all non-whitelisted characters.
414 The returned filename is always a basename of the file.""" 447 The returned filename is always a basename of the file."""
@@ -421,7 +454,7 @@ def sanitize_filename(filename, replacement=&#39;_&#39;, max_length=200): @@ -421,7 +454,7 @@ def sanitize_filename(filename, replacement=&#39;_&#39;, max_length=200):
421 while " " in sane_fname: 454 while " " in sane_fname:
422 sane_fname = sane_fname.replace(' ', ' ') 455 sane_fname = sane_fname.replace(' ', ' ')
423 456
424 - if not len(filename): 457 + if not filename:
425 sane_fname = 'NONAME' 458 sane_fname = 'NONAME'
426 459
427 # limit filename length 460 # limit filename length
@@ -507,7 +540,7 @@ def find_ole(filename, data): @@ -507,7 +540,7 @@ def find_ole(filename, data):
507 yield None # --> leads to non-0 return code but try next file first 540 yield None # --> leads to non-0 return code but try next file first
508 541
509 542
510 -def process_file(container, filename, data, output_dir=None): 543 +def process_file(filename, data, output_dir=None):
511 """ find embedded objects in given file 544 """ find embedded objects in given file
512 545
513 if data is given (from xglob for encrypted zip files), then filename is 546 if data is given (from xglob for encrypted zip files), then filename is
@@ -530,8 +563,8 @@ def process_file(container, filename, data, output_dir=None): @@ -530,8 +563,8 @@ def process_file(container, filename, data, output_dir=None):
530 fname_prefix = os.path.join(base_dir, sane_fname) 563 fname_prefix = os.path.join(base_dir, sane_fname)
531 564
532 # TODO: option to extract objects to files (false by default) 565 # TODO: option to extract objects to files (false by default)
533 - print ('-'*79)  
534 - print ('File: %r' % filename) 566 + print('-'*79)
  567 + print('File: %r' % filename)
535 index = 1 568 index = 1
536 569
537 # do not throw errors but remember them and try continue with other streams 570 # do not throw errors but remember them and try continue with other streams
@@ -553,10 +586,10 @@ def process_file(container, filename, data, output_dir=None): @@ -553,10 +586,10 @@ def process_file(container, filename, data, output_dir=None):
553 stream = ole.openstream(path_parts) 586 stream = ole.openstream(path_parts)
554 print('extract file embedded in OLE object from stream %r:' 587 print('extract file embedded in OLE object from stream %r:'
555 % stream_path) 588 % stream_path)
556 - print ('Parsing OLE Package') 589 + print('Parsing OLE Package')
557 opkg = OleNativeStream(stream) 590 opkg = OleNativeStream(stream)
558 # leave stream open until dumping is finished 591 # leave stream open until dumping is finished
559 - except Exception: 592 + except Exception as exc:
560 log.warning('*** Not an OLE 1.0 Object ({0})'.format(exc)) 593 log.warning('*** Not an OLE 1.0 Object ({0})'.format(exc))
561 err_stream = True 594 err_stream = True
562 if stream is not None: 595 if stream is not None:
@@ -568,9 +601,9 @@ def process_file(container, filename, data, output_dir=None): @@ -568,9 +601,9 @@ def process_file(container, filename, data, output_dir=None):
568 log.debug('Object is not embedded but only linked to ' 601 log.debug('Object is not embedded but only linked to '
569 '- skip') 602 '- skip')
570 continue 603 continue
571 - print ('Filename = %r' % opkg.filename)  
572 - print ('Source path = %r' % opkg.src_path)  
573 - print ('Temp path = %r' % opkg.temp_path) 604 + print('Filename = %r' % opkg.filename)
  605 + print('Source path = %r' % opkg.src_path)
  606 + print('Temp path = %r' % opkg.temp_path)
574 if opkg.filename: 607 if opkg.filename:
575 fname = '%s_%s' % (fname_prefix, 608 fname = '%s_%s' % (fname_prefix,
576 sanitize_filename(opkg.filename)) 609 sanitize_filename(opkg.filename))
@@ -579,10 +612,10 @@ def process_file(container, filename, data, output_dir=None): @@ -579,10 +612,10 @@ def process_file(container, filename, data, output_dir=None):
579 612
580 # dump 613 # dump
581 try: 614 try:
582 - print ('saving to file %s' % fname) 615 + print('saving to file %s' % fname)
583 with open(fname, 'wb') as writer: 616 with open(fname, 'wb') as writer:
584 n_dumped = 0 617 n_dumped = 0
585 - next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size) 618 + next_size = min(DUMP_CHUNK_SIZE, opkg.actual_size)
586 while next_size: 619 while next_size:
587 data = stream.read(next_size) 620 data = stream.read(next_size)
588 writer.write(data) 621 writer.write(data)
@@ -591,8 +624,8 @@ def process_file(container, filename, data, output_dir=None): @@ -591,8 +624,8 @@ def process_file(container, filename, data, output_dir=None):
591 logging.warning('Wanted to read {0}, got {1}' 624 logging.warning('Wanted to read {0}, got {1}'
592 .format(next_size, len(data))) 625 .format(next_size, len(data)))
593 break 626 break
594 - next_size = min(DUMP_CHUNK_SIZE,  
595 - opkg.actual_size - n_dumped) 627 + next_size = min(DUMP_CHUNK_SIZE,
  628 + opkg.actual_size - n_dumped)
596 did_dump = True 629 did_dump = True
597 except Exception as exc: 630 except Exception as exc:
598 log.warning('error dumping to {0} ({1})' 631 log.warning('error dumping to {0} ({1})'
@@ -605,23 +638,17 @@ def process_file(container, filename, data, output_dir=None): @@ -605,23 +638,17 @@ def process_file(container, filename, data, output_dir=None):
605 return err_stream, err_dumping, did_dump 638 return err_stream, err_dumping, did_dump
606 639
607 640
608 -#=== MAIN ================================================================= 641 +# === MAIN ====================================================================
  642 +
609 643
610 def main(): 644 def main():
611 """ main function, called when running this as script """ 645 """ main function, called when running this as script """
612 # print banner with version 646 # print banner with version
613 - print ('oleobj %s - http://decalage.info/oletools' % __version__)  
614 - print ('THIS IS WORK IN PROGRESS - Check updates regularly!')  
615 - print ('Please report any issue at https://github.com/decalage2/oletools/issues')  
616 - print ('')  
617 -  
618 - DEFAULT_LOG_LEVEL = "warning" # Default log level  
619 - LOG_LEVELS = {'debug': logging.DEBUG,  
620 - 'info': logging.INFO,  
621 - 'warning': logging.WARNING,  
622 - 'error': logging.ERROR,  
623 - 'critical': logging.CRITICAL  
624 - } 647 + print('oleobj %s - http://decalage.info/oletools' % __version__)
  648 + print('THIS IS WORK IN PROGRESS - Check updates regularly!')
  649 + print('Please report any issue at '
  650 + 'https://github.com/decalage2/oletools/issues')
  651 + print('')
625 652
626 usage = 'usage: %prog [options] <filename> [filename2 ...]' 653 usage = 'usage: %prog [options] <filename> [filename2 ...]'
627 parser = optparse.OptionParser(usage=usage) 654 parser = optparse.OptionParser(usage=usage)
@@ -630,21 +657,31 @@ def main(): @@ -630,21 +657,31 @@ def main():
630 # parser.add_option('-c', '--csv', dest='csv', 657 # parser.add_option('-c', '--csv', dest='csv',
631 # help='export results to a CSV file') 658 # help='export results to a CSV file')
632 parser.add_option("-r", action="store_true", dest="recursive", 659 parser.add_option("-r", action="store_true", dest="recursive",
633 - help='find files recursively in subdirectories.')  
634 - parser.add_option("-d", type="str", dest="output_dir",  
635 - help='use specified directory to output files.', default=None)  
636 - parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,  
637 - help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')  
638 - parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',  
639 - help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')  
640 - parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,  
641 - help="logging level debug/info/warning/error/critical (default=%default)") 660 + help='find files recursively in subdirectories.')
  661 + parser.add_option("-d", type="str", dest="output_dir", default=None,
  662 + help='use specified directory to output files.')
  663 + parser.add_option("-z", "--zip", dest='zip_password', type='str',
  664 + default=None,
  665 + help='if the file is a zip archive, open first file from'
  666 + 'it, using the provided password (requires Python '
  667 + '2.6+)')
  668 + parser.add_option("-f", "--zipfname", dest='zip_fname', type='str',
  669 + default='*',
  670 + help='if the file is a zip archive, file(s) to be opened'
  671 + 'within the zip. Wildcards * and ? are supported. '
  672 + '(default:*)')
  673 + parser.add_option('-l', '--loglevel', dest="loglevel", action="store",
  674 + default=DEFAULT_LOG_LEVEL,
  675 + help='logging level debug/info/warning/error/critical '
  676 + '(default=%default)')
642 677
643 # options for compatibility with ripOLE 678 # options for compatibility with ripOLE
644 parser.add_option('-i', '--more-input', type='str', default=None, 679 parser.add_option('-i', '--more-input', type='str', default=None,
645 - help='Additional file to parse (same as positional arguments)') 680 + help='Additional file to parse (same as positional '
  681 + 'arguments)')
646 parser.add_option('-v', '--verbose', action='store_true', 682 parser.add_option('-v', '--verbose', action='store_true',
647 - help='verbose mode, set logging to DEBUG (overwrites -l)') 683 + help='verbose mode, set logging to DEBUG '
  684 + '(overwrites -l)')
648 685
649 (options, args) = parser.parse_args() 686 (options, args) = parser.parse_args()
650 if options.more_input: 687 if options.more_input:
@@ -653,7 +690,7 @@ def main(): @@ -653,7 +690,7 @@ def main():
653 options.loglevel = 'debug' 690 options.loglevel = 'debug'
654 691
655 # Print help if no arguments are passed 692 # Print help if no arguments are passed
656 - if len(args) == 0: 693 + if not args:
657 parser.print_help() 694 parser.print_help()
658 return RETURN_ERR_ARGS 695 return RETURN_ERR_ARGS
659 for filename in args: 696 for filename in args:
@@ -674,13 +711,15 @@ def main(): @@ -674,13 +711,15 @@ def main():
674 any_err_dumping = False 711 any_err_dumping = False
675 any_did_dump = False 712 any_did_dump = False
676 713
677 - for container, filename, data in xglob.iter_files(args, recursive=options.recursive,  
678 - zip_password=options.zip_password, zip_fname=options.zip_fname): 714 + for container, filename, data in \
  715 + xglob.iter_files(args, recursive=options.recursive,
  716 + zip_password=options.zip_password,
  717 + zip_fname=options.zip_fname):
679 # ignore directory names stored in zip files: 718 # ignore directory names stored in zip files:
680 if container and filename.endswith('/'): 719 if container and filename.endswith('/'):
681 continue 720 continue
682 err_stream, err_dumping, did_dump = \ 721 err_stream, err_dumping, did_dump = \
683 - process_file(container, filename, data, options.output_dir) 722 + process_file(filename, data, options.output_dir)
684 any_err_stream |= err_stream 723 any_err_stream |= err_stream
685 any_err_dumping |= err_dumping 724 any_err_dumping |= err_dumping
686 any_did_dump |= did_dump 725 any_did_dump |= did_dump