crypto.py
15.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
#!/usr/bin/env python
"""
crypto.py
Module to be used by other scripts and modules in oletools, that provides
information on encryption in OLE files.
Uses :py:mod:`msoffcrypto-tool` to decrypt if it is available. Otherwise
decryption will fail with an ImportError.
Encryption/Write-Protection can be realized in many different ways. They range
from setting a single flag in an otherwise unprotected file to embedding a
regular file (e.g. xlsx) in an EncryptedStream inside an OLE file. That means
that (1) that lots of bad things are accesible even if no encryption password
is known, and (2) even basic attributes like the file type can change by
decryption. Therefore I suggest the following general routine to deal with
potentially encrypted files::
def script_main_function(input_file, passwords, crypto_nesting=0, args):
'''Wrapper around main function to deal with encrypted files.'''
initial_stuff(input_file, args)
result = None
try:
result = do_your_thing_assuming_no_encryption(input_file)
if not crypto.is_encrypted(input_file):
return result
except Exception:
if not crypto.is_encrypted(input_file):
raise
# we reach this point only if file is encrypted
# check if this is an encrypted file in an encrypted file in an ...
if crypto_nesting >= crypto.MAX_NESTING_DEPTH:
raise crypto.MaxCryptoNestingReached(crypto_nesting, filename)
decrypted_file = None
try:
decrypted_file = crypto.decrypt(input_file, passwords)
# might still be encrypted, so call this again recursively
result = script_main_function(decrypted_file, passwords,
crypto_nesting+1, args)
except Exception:
raise
finally: # clean up
try: # (maybe file was not yet created)
os.unlink(decrypted_file)
except Exception:
pass
(Realized e.g. in :py:mod:`oletools.msodde`).
That means that caller code needs another wrapper around its main function. I
did try it another way first (a transparent on-demand unencrypt) but for the
above reasons I believe this is the better way. Also, non-top-level-code can
just assume that it works on unencrypted data and fail with an exception if
encrypted data makes its work impossible. No need to check `if is_encrypted()`
at the start of functions.
.. seealso:: [MS-OFFCRYPTO]
.. seealso:: https://github.com/nolze/msoffcrypto-tool
crypto is part of the python-oletools package:
http://www.decalage.info/python/oletools
"""
# === LICENSE =================================================================
# crypto is copyright (c) 2014-2019 Philippe Lagadec (http://www.decalage.info)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# CHANGELOG:
# 2019-02-14 v0.01 CH: - first version with encryption check from oleid
# 2019-04-01 v0.54 PL: - fixed bug in is_encrypted_ole
__version__ = '0.54'
import sys
import struct
import os
from os.path import splitext, isfile
from tempfile import mkstemp
import zipfile
import logging
from olefile import OleFileIO
try:
import msoffcrypto
except ImportError:
msoffcrypto = None
# IMPORTANT: it should be possible to run oletools directly as scripts
# in any directory without installing them with pip or setup.py.
# In that case, relative imports are NOT usable.
# And to enable Python 2+3 compatibility, we need to use absolute imports,
# so we add the oletools parent folder to sys.path (absolute+normalized path):
_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
if _parent_dir not in sys.path:
sys.path.insert(0, _parent_dir)
from oletools.common.errors import CryptoErrorBase, WrongEncryptionPassword, \
UnsupportedEncryptionError, MaxCryptoNestingReached, CryptoLibNotImported
from oletools.common.log_helper import log_helper
#: if there is an encrypted file embedded in an encrypted file,
#: how deep down do we go
MAX_NESTING_DEPTH = 10
# === LOGGING =================================================================
# TODO: use log_helper instead
def get_logger(name, level=logging.CRITICAL+1):
"""
Create a suitable logger object for this module.
The goal is not to change settings of the root logger, to avoid getting
other modules' logs on the screen.
If a logger exists with same name, reuse it. (Else it would have duplicate
handlers and messages would be doubled.)
The level is set to CRITICAL+1 by default, to avoid any logging.
"""
# First, test if there is already a logger with the same name, else it
# will generate duplicate messages (due to duplicate handlers):
if name in logging.Logger.manager.loggerDict:
# NOTE: another less intrusive but more "hackish" solution would be to
# use getLogger then test if its effective level is not default.
logger = logging.getLogger(name)
# make sure level is OK:
logger.setLevel(level)
return logger
# get a new logger:
logger = logging.getLogger(name)
# only add a NullHandler for this logger, it is up to the application
# to configure its own logging:
logger.addHandler(logging.NullHandler())
logger.setLevel(level)
return logger
# a global logger object used for debugging:
log = get_logger('crypto')
def enable_logging():
"""
Enable logging for this module (disabled by default).
This will set the module-specific logger level to NOTSET, which
means the main application controls the actual logging level.
"""
log.setLevel(logging.NOTSET)
def is_encrypted(some_file):
"""
Determine whether document contains encrypted content.
This should return False for documents that are just write-protected or
signed or finalized. It should return True if ANY content of the file is
encrypted and can therefore not be analyzed by other oletools modules
without given a password.
Exception: there are way to write-protect an office document by embedding
it as encrypted stream with hard-coded standard password into an otherwise
empty OLE file. From an office user point of view, this is no encryption,
but regarding file structure this is encryption, so we return `True` for
these.
This should not raise exceptions needlessly.
This implementation is rather simple: it returns True if the file contains
streams with typical encryption names (c.f. [MS-OFFCRYPTO]). It does not
test whether these streams actually contain data or whether the ole file
structure contains the necessary references to these. It also checks the
"well-known property" PIDSI_DOC_SECURITY if the SummaryInformation stream
is accessible (c.f. [MS-OLEPS] 2.25.1)
:param some_file: File name or an opened OleFileIO
:type some_file: :py:class:`olefile.OleFileIO` or `str`
:returns: True if (and only if) the file contains encrypted content
"""
log.debug('is_encrypted')
# ask msoffcrypto if possible
if check_msoffcrypto():
log.debug('Checking for encryption using msoffcrypto')
file_handle = None
file_pos = None
try:
if isinstance(some_file, OleFileIO):
# TODO: hacky, replace once msoffcrypto-tools accepts OleFileIO
file_handle = some_file.fp
file_pos = file_handle.tell()
file_handle.seek(0)
else:
file_handle = open(some_file, 'rb')
return msoffcrypto.OfficeFile(file_handle).is_encrypted()
except Exception as exc:
log.warning('msoffcrypto failed to interpret file {} or determine '
'whether it is encrypted: {}'
.format(file_handle.name, exc))
finally:
try:
if file_pos is not None: # input was OleFileIO
file_handle.seek(file_pos)
else: # input was file name
file_handle.close()
except Exception as exc:
log.warning('Ignoring error during clean up: {}'.format(exc))
# if that failed, try ourselves with older and less accurate code
if isinstance(some_file, OleFileIO):
return _is_encrypted_ole(some_file)
if zipfile.is_zipfile(some_file):
return _is_encrypted_zip(some_file)
# otherwise assume it is the name of an ole file
return _is_encrypted_ole(OleFileIO(some_file))
def _is_encrypted_zip(filename):
"""Specialization of :py:func:`is_encrypted` for zip-based files."""
log.debug('is_encrypted_zip')
# TODO: distinguish OpenXML from normal zip files
# try to decrypt a few bytes from first entry
with zipfile.ZipFile(filename, 'r') as zipper:
first_entry = zipper.infolist()[0]
try:
with zipper.open(first_entry, 'r') as reader:
reader.read(min(16, first_entry.file_size))
return False
except RuntimeError as rt_err:
return 'crypt' in str(rt_err)
def _is_encrypted_ole(ole):
"""Specialization of :py:func:`is_encrypted` for ole files."""
log.debug('is_encrypted_ole')
# check well known property for password protection
# (this field may be missing for Powerpoint2000, for example)
# TODO: check whether password protection always implies encryption. Could
# write-protection or signing with password trigger this as well?
if ole.exists("\x05SummaryInformation"):
suminfo_data = ole.getproperties("\x05SummaryInformation")
if 0x13 in suminfo_data and (suminfo_data[0x13] & 1):
return True
# check a few stream names
# TODO: check whether these actually contain data and whether other
# necessary properties exist / are set
if ole.exists('EncryptionInfo'):
log.debug('found stream EncryptionInfo')
return True
# or an encrypted ppt file
if ole.exists('EncryptedSummary') and \
not ole.exists('SummaryInformation'):
return True
# Word-specific old encryption:
if ole.exists('WordDocument'):
# check for Word-specific encryption flag:
stream = None
try:
stream = ole.openstream(["WordDocument"])
# pass header 10 bytes
stream.read(10)
# read flag structure:
temp16 = struct.unpack("H", stream.read(2))[0]
f_encrypted = (temp16 & 0x0100) >> 8
if f_encrypted:
return True
except Exception:
raise
finally:
if stream is not None:
stream.close()
# no indication of encryption
return False
#: one way to achieve "write protection" in office files is to encrypt the file
#: using this password
WRITE_PROTECT_ENCRYPTION_PASSWORD = 'VelvetSweatshop'
def _check_msoffcrypto():
"""Raise a :py:class:`CryptoLibNotImported` if msoffcrypto not imported."""
if msoffcrypto is None:
raise CryptoLibNotImported()
def check_msoffcrypto():
"""Return `True` iff :py:mod:`msoffcrypto` could be imported."""
return msoffcrypto is not None
def decrypt(filename, passwords=None, **temp_file_args):
"""
Try to decrypt an encrypted file
This function tries to decrypt the given file using a given set of
passwords. If no password is given, tries the standard password for write
protection. Creates a file with decrypted data whose file name is returned.
If the decryption fails, None is returned.
:param str filename: path to an ole file on disc
:param passwords: list/set/tuple/... of passwords or a single password or
None
:type passwords: iterable or str or None
:param temp_file_args: arguments for :py:func:`tempfile.mkstemp` e.g.,
`dirname` or `prefix`. `suffix` will default to
suffix of input `filename`, `prefix` defaults to
`oletools-decrypt-`; `text` will be ignored
:returns: name of the decrypted temporary file.
:raises: :py:class:`ImportError` if :py:mod:`msoffcrypto-tools` not found
:raises: :py:class:`ValueError` if the given file is not encrypted
"""
_check_msoffcrypto()
# normalize password so we always have a list/tuple
if isinstance(passwords, str):
passwords = (passwords, )
elif not passwords:
passwords = (WRITE_PROTECT_ENCRYPTION_PASSWORD, )
# check temp file args
if 'prefix' not in temp_file_args:
temp_file_args['prefix'] = 'oletools-decrypt-'
if 'suffix' not in temp_file_args:
temp_file_args['suffix'] = splitext(filename)[1]
temp_file_args['text'] = False
decrypt_file = None
with open(filename, 'rb') as reader:
try:
crypto_file = msoffcrypto.OfficeFile(reader)
except Exception as exc: # e.g. ppt, not yet supported by msoffcrypto
if 'Unrecognized file format' in str(exc):
# raise different exception without stack trace of original exc
if sys.version_info.major == 2:
raise UnsupportedEncryptionError(filename)
else:
# this is a syntax error in python 2, so wrap it in exec()
exec('raise UnsupportedEncryptionError(filename) from None')
else:
raise
if not crypto_file.is_encrypted():
raise ValueError('Given input file {} is not encrypted!'
.format(filename))
for password in passwords:
write_descriptor = None
write_handle = None
decrypt_file = None
try:
crypto_file.load_key(password=password)
# create temp file
write_descriptor, decrypt_file = mkstemp(**temp_file_args)
write_handle = os.fdopen(write_descriptor, 'wb')
write_descriptor = None # is now handled via write_handle
crypto_file.decrypt(write_handle)
# decryption was successfull; clean up and return
write_handle.close()
write_handle = None
break
except Exception:
# error-clean up: close everything and del temp file
if write_handle:
write_handle.close()
elif write_descriptor:
os.close(write_descriptor)
if decrypt_file and isfile(decrypt_file):
os.unlink(decrypt_file)
decrypt_file = None
# if we reach this, all passwords were tried without success
return decrypt_file