Commit 40694d71ff364a46bb21aeec14a66f1007902275

Authored by Philippe Lagadec
Committed by GitHub
2 parents ec182bcb c7a708db

Merge pull request #441 from christian-intra2net/is-encrypted-by-msoffcrypto

Is encrypted by msoffcrypto
oletools/crypto.py
... ... @@ -168,6 +168,7 @@ def enable_logging():
168 168 """
169 169 log.setLevel(logging.NOTSET)
170 170  
  171 +
171 172 def is_encrypted(some_file):
172 173 """
173 174 Determine whether document contains encrypted content.
... ... @@ -197,17 +198,55 @@ def is_encrypted(some_file):
197 198 :returns: True if (and only if) the file contains encrypted content
198 199 """
199 200 log.debug('is_encrypted')
200   - if isinstance(some_file, OleFileIO):
201   - return is_encrypted_ole(some_file) # assume it is OleFileIO
202   - if zipfile.is_zipfile(some_file):
203   - return is_encrypted_zip(some_file)
204   - # otherwise assume it is the name of an ole file
205   - return is_encrypted_ole(OleFileIO(some_file))
206 201  
  202 + # ask msoffcrypto if possible
  203 + if check_msoffcrypto():
  204 + log.debug('Checking for encryption using msoffcrypto')
  205 + file_handle = None
  206 + file_pos = None
  207 + try:
  208 + if isinstance(some_file, OleFileIO):
  209 + # TODO: hacky, replace once msoffcrypto-tools accepts OleFileIO
  210 + file_handle = some_file.fp
  211 + file_pos = file_handle.tell()
  212 + file_handle.seek(0)
  213 + else:
  214 + file_handle = open(some_file, 'rb')
  215 +
  216 + return msoffcrypto.OfficeFile(file_handle).is_encrypted()
  217 +
  218 + except Exception as exc:
  219 + log.warning('msoffcrypto failed to interpret file {} or determine '
  220 + 'whether it is encrypted: {}'
  221 + .format(file_handle.name, exc))
  222 +
  223 + finally:
  224 + try:
  225 + if file_pos is not None: # input was OleFileIO
  226 + file_handle.seek(file_pos)
  227 + else: # input was file name
  228 + file_handle.close()
  229 + except Exception as exc:
  230 + log.warning('Ignoring error during clean up: {}'.format(exc))
  231 +
  232 + # if that failed, try ourselves with older and less accurate code
  233 + try:
  234 + if isinstance(some_file, OleFileIO):
  235 + return _is_encrypted_ole(some_file)
  236 + if zipfile.is_zipfile(some_file):
  237 + return _is_encrypted_zip(some_file)
  238 + # otherwise assume it is the name of an ole file
  239 + return _is_encrypted_ole(OleFileIO(some_file))
  240 + except Exception as exc:
  241 + log.warning('Failed to check {} for encryption ({}); assume it is not '
  242 + 'encrypted.'.format(some_file, exc))
207 243  
208   -def is_encrypted_zip(filename):
  244 + return False
  245 +
  246 +
  247 +def _is_encrypted_zip(filename):
209 248 """Specialization of :py:func:`is_encrypted` for zip-based files."""
210   - log.debug('is_encrypted_zip')
  249 + log.debug('Checking for encryption in zip file')
211 250 # TODO: distinguish OpenXML from normal zip files
212 251 # try to decrypt a few bytes from first entry
213 252 with zipfile.ZipFile(filename, 'r') as zipper:
... ... @@ -220,9 +259,9 @@ def is_encrypted_zip(filename):
220 259 return 'crypt' in str(rt_err)
221 260  
222 261  
223   -def is_encrypted_ole(ole):
  262 +def _is_encrypted_ole(ole):
224 263 """Specialization of :py:func:`is_encrypted` for ole files."""
225   - log.debug('is_encrypted_ole')
  264 + log.debug('Checking for encryption in OLE file')
226 265 # check well known property for password protection
227 266 # (this field may be missing for Powerpoint2000, for example)
228 267 # TODO: check whether password protection always implies encryption. Could
... ... @@ -256,8 +295,6 @@ def is_encrypted_ole(ole):
256 295 f_encrypted = (temp16 & 0x0100) >> 8
257 296 if f_encrypted:
258 297 return True
259   - except Exception:
260   - raise
261 298 finally:
262 299 if stream is not None:
263 300 stream.close()
... ... @@ -324,6 +361,8 @@ def decrypt(filename, passwords=None, **temp_file_args):
324 361 crypto_file = msoffcrypto.OfficeFile(reader)
325 362 except Exception as exc: # e.g. ppt, not yet supported by msoffcrypto
326 363 if 'Unrecognized file format' in str(exc):
  364 + log.debug('Caught exception', exc_info=True)
  365 +
327 366 # raise different exception without stack trace of original exc
328 367 if sys.version_info.major == 2:
329 368 raise UnsupportedEncryptionError(filename)
... ... @@ -337,6 +376,7 @@ def decrypt(filename, passwords=None, **temp_file_args):
337 376 .format(filename))
338 377  
339 378 for password in passwords:
  379 + log.debug('Trying to decrypt with password {!r}'.format(password))
340 380 write_descriptor = None
341 381 write_handle = None
342 382 decrypt_file = None
... ... @@ -354,6 +394,8 @@ def decrypt(filename, passwords=None, **temp_file_args):
354 394 write_handle = None
355 395 break
356 396 except Exception:
  397 + log.debug('Failed to decrypt', exc_info=True)
  398 +
357 399 # error-clean up: close everything and del temp file
358 400 if write_handle:
359 401 write_handle.close()
... ... @@ -363,4 +405,5 @@ def decrypt(filename, passwords=None, **temp_file_args):
363 405 os.unlink(decrypt_file)
364 406 decrypt_file = None
365 407 # if we reach this, all passwords were tried without success
  408 + log.debug('All passwords failed')
366 409 return decrypt_file
... ...
oletools/msodde.py
... ... @@ -987,6 +987,9 @@ def process_maybe_encrypted(filepath, passwords=None, crypto_nesting=0,
987 987 try:
988 988 logger.debug('Trying to decrypt file')
989 989 decrypted_file = crypto.decrypt(filepath, passwords)
  990 + if not decrypted_file:
  991 + logger.error('Decrypt failed, run with debug output to get details')
  992 + raise crypto.WrongEncryptionPassword(filepath)
990 993 logger.info('Analyze decrypted file')
991 994 result = process_maybe_encrypted(decrypted_file, passwords,
992 995 crypto_nesting+1, **kwargs)
... ...
oletools/olevba.py
... ... @@ -3893,6 +3893,7 @@ def process_file(filename, data, container, options, crypto_nesting=0):
3893 3893 [crypto.WRITE_PROTECT_ENCRYPTION_PASSWORD, ]
3894 3894 decrypted_file = crypto.decrypt(filename, passwords)
3895 3895 if not decrypted_file:
  3896 + log.error('Decrypt failed, run with debug output to get details')
3896 3897 raise crypto.WrongEncryptionPassword(filename)
3897 3898 log.info('Working on decrypted file')
3898 3899 return process_file(decrypted_file, data, container or filename,
... ...
tests/common/log_helper/log_helper_test_main.py
... ... @@ -34,12 +34,16 @@ def init_logging_and_log(args):
34 34 level = args[-1]
35 35 use_json = 'as-json' in args
36 36 throw = 'throw' in args
  37 + percent_autoformat = '%-autoformat' in args
37 38  
38 39 if 'enable' in args:
39 40 log_helper.enable_logging(use_json, level, stream=sys.stdout)
40 41  
41 42 _log()
42 43  
  44 + if percent_autoformat:
  45 + logger.info('The %s is %d.', 'answer', 47)
  46 +
43 47 if throw:
44 48 raise Exception('An exception occurred before ending the logging')
45 49  
... ...
tests/common/log_helper/test_log_helper.py
... ... @@ -112,6 +112,11 @@ class TestLogHelper(unittest.TestCase):
112 112 ]
113 113 self.assertEqual(jout, jexpect)
114 114  
  115 + def test_percent_autoformat(self):
  116 + """Test that auto-formatting of log strings with `%` works."""
  117 + output = self._run_test(['enable', '%-autoformat', 'info'])
  118 + self.assertIn('The answer is 47.', output)
  119 +
115 120 def test_json_correct_on_exceptions(self):
116 121 """
117 122 Test that even on unhandled exceptions our JSON is always correct
... ...
tests/msodde/test_crypto.py
... ... @@ -2,7 +2,7 @@
2 2  
3 3 import sys
4 4 import unittest
5   -from os.path import join as pjoin
  5 +from os.path import basename, join as pjoin
6 6  
7 7 from tests.test_utils import DATA_BASE_DIR
8 8  
... ... @@ -11,8 +11,8 @@ from oletools import msodde
11 11  
12 12  
13 13 @unittest.skipIf(not crypto.check_msoffcrypto(),
14   - 'Module msoffcrypto not installed for python{}.{}'
15   - .format(sys.version_info.major, sys.version_info.minor))
  14 + 'Module msoffcrypto not installed for {}'
  15 + .format(basename(sys.executable)))
16 16 class MsoddeCryptoTest(unittest.TestCase):
17 17 """Test integration of decryption in msodde."""
18 18 def test_standard_password(self):
... ...
tests/oleid/test_basic.py
... ... @@ -20,7 +20,7 @@ class TestOleIDBasic(unittest.TestCase):
20 20 """Run all file in test-data through oleid and compare to known ouput"""
21 21 # this relies on order of indicators being constant, could relax that
22 22 # Also requires that files have the correct suffixes (no rtf in doc)
23   - NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '')
  23 + NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '', '.odt', '.ods', '.odp')
24 24 NON_OLE_VALUES = (False, )
25 25 WORD = b'Microsoft Office Word'
26 26 PPT = b'Microsoft Office PowerPoint'
... ... @@ -121,6 +121,33 @@ class TestOleIDBasic(unittest.TestCase):
121 121 'msodde/harmless-clean.docx': (False,),
122 122 'oleform/oleform-PR314.docm': (False,),
123 123 'basic/encrypted.docx': CRYPT,
  124 + 'oleobj/external_link/sample_with_external_link_to_doc.docx': (False,),
  125 + 'oleobj/external_link/sample_with_external_link_to_doc.xlsb': (False,),
  126 + 'oleobj/external_link/sample_with_external_link_to_doc.dotm': (False,),
  127 + 'oleobj/external_link/sample_with_external_link_to_doc.xlsm': (False,),
  128 + 'oleobj/external_link/sample_with_external_link_to_doc.pptx': (False,),
  129 + 'oleobj/external_link/sample_with_external_link_to_doc.dotx': (False,),
  130 + 'oleobj/external_link/sample_with_external_link_to_doc.docm': (False,),
  131 + 'oleobj/external_link/sample_with_external_link_to_doc.potm': (False,),
  132 + 'oleobj/external_link/sample_with_external_link_to_doc.xlsx': (False,),
  133 + 'oleobj/external_link/sample_with_external_link_to_doc.potx': (False,),
  134 + 'oleobj/external_link/sample_with_external_link_to_doc.ppsm': (False,),
  135 + 'oleobj/external_link/sample_with_external_link_to_doc.pptm': (False,),
  136 + 'oleobj/external_link/sample_with_external_link_to_doc.ppsx': (False,),
  137 + 'encrypted/autostart-encrypt-standardpassword.xlsm':
  138 + (True, False, 'unknown', True, False, False, False, False, False, False, 0),
  139 + 'encrypted/autostart-encrypt-standardpassword.xls':
  140 + (True, True, EXCEL, True, False, True, True, False, False, False, 0),
  141 + 'encrypted/dde-test-encrypt-standardpassword.xlsx':
  142 + (True, False, 'unknown', True, False, False, False, False, False, False, 0),
  143 + 'encrypted/dde-test-encrypt-standardpassword.xlsm':
  144 + (True, False, 'unknown', True, False, False, False, False, False, False, 0),
  145 + 'encrypted/autostart-encrypt-standardpassword.xlsb':
  146 + (True, False, 'unknown', True, False, False, False, False, False, False, 0),
  147 + 'encrypted/dde-test-encrypt-standardpassword.xls':
  148 + (True, True, EXCEL, True, False, False, True, False, False, False, 0),
  149 + 'encrypted/dde-test-encrypt-standardpassword.xlsb':
  150 + (True, False, 'unknown', True, False, False, False, False, False, False, 0),
124 151 }
125 152  
126 153 indicator_names = []
... ... @@ -148,7 +175,8 @@ class TestOleIDBasic(unittest.TestCase):
148 175 OLE_VALUES[name]))
149 176 except KeyError:
150 177 print('Should add oleid output for {} to {} ({})'
151   - .format(name, __name__, values[3:]))
  178 + .format(name, __name__, values))
  179 +
152 180  
153 181 # just in case somebody calls this file as a script
154 182 if __name__ == '__main__':
... ...
tests/olevba/test_basic.py
... ... @@ -3,21 +3,71 @@ Test basic functionality of olevba[3]
3 3 """
4 4  
5 5 import unittest
6   -import sys
7   -if sys.version_info.major <= 2:
8   - from oletools import olevba
9   -else:
10   - from oletools import olevba3 as olevba
11 6 import os
12 7 from os.path import join
  8 +import re
13 9  
14 10 # Directory with test data, independent of current working directory
15   -from tests.test_utils import DATA_BASE_DIR
  11 +from tests.test_utils import DATA_BASE_DIR, call_and_capture
16 12  
17 13  
18 14 class TestOlevbaBasic(unittest.TestCase):
19 15 """Tests olevba basic functionality"""
20 16  
  17 + def test_text_behaviour(self):
  18 + """Test behaviour of olevba when presented with pure text file."""
  19 + self.do_test_behaviour('text')
  20 +
  21 + def test_empty_behaviour(self):
  22 + """Test behaviour of olevba when presented with pure text file."""
  23 + self.do_test_behaviour('empty')
  24 +
  25 + def do_test_behaviour(self, filename):
  26 + """Helper for test_{text,empty}_behaviour."""
  27 + input_file = join(DATA_BASE_DIR, 'basic', filename)
  28 + output, _ = call_and_capture('olevba', args=(input_file, ))
  29 +
  30 + # check output
  31 + self.assertTrue(re.search(r'^Type:\s+Text\s*$', output, re.MULTILINE),
  32 + msg='"Type: Text" not found in output:\n' + output)
  33 + self.assertTrue(re.search(r'^No suspicious .+ found.$', output,
  34 + re.MULTILINE),
  35 + msg='"No suspicous...found" not found in output:\n' + \
  36 + output)
  37 + self.assertNotIn('error', output.lower())
  38 +
  39 + # check warnings
  40 + for line in output.splitlines():
  41 + if line.startswith('WARNING ') and 'encrypted' in line:
  42 + continue # encryption warnings are ok
  43 + elif 'warn' in line.lower():
  44 + raise self.fail('Found "warn" in output line: "{}"'
  45 + .format(line.rstrip()))
  46 + self.assertIn('not encrypted', output)
  47 +
  48 + def test_rtf_behaviour(self):
  49 + """Test behaviour of olevba when presented with an rtf file."""
  50 + input_file = join(DATA_BASE_DIR, 'msodde', 'RTF-Spec-1.7.rtf')
  51 + output, ret_code = call_and_capture('olevba', args=(input_file, ),
  52 + accept_nonzero_exit=True)
  53 +
  54 + # check that return code is olevba.RETURN_OPEN_ERROR
  55 + self.assertEqual(ret_code, 5)
  56 +
  57 + # check output:
  58 + self.assertIn('FileOpenError', output)
  59 + self.assertIn('is RTF', output)
  60 + self.assertIn('rtfobj.py', output)
  61 + self.assertIn('not encrypted', output)
  62 +
  63 + # check warnings
  64 + for line in output.splitlines():
  65 + if line.startswith('WARNING ') and 'encrypted' in line:
  66 + continue # encryption warnings are ok
  67 + elif 'warn' in line.lower():
  68 + raise self.fail('Found "warn" in output line: "{}"'
  69 + .format(line.rstrip()))
  70 +
21 71 def test_crypt_return(self):
22 72 """
23 73 Tests that encrypted files give a certain return code.
... ... @@ -28,23 +78,23 @@ class TestOlevbaBasic(unittest.TestCase):
28 78 CRYPT_DIR = join(DATA_BASE_DIR, 'encrypted')
29 79 CRYPT_RETURN_CODE = 9
30 80 ADD_ARGS = [], ['-d', ], ['-a', ], ['-j', ], ['-t', ]
31   - EXCEPTIONS = ['autostart-encrypt-standardpassword.xlsm', # These ...
32   - 'autostart-encrypt-standardpassword.xlsb', # files ...
33   - 'dde-test-encrypt-standardpassword.xls', # are ...
34   - 'dde-test-encrypt-standardpassword.xlsx', # decrypted
35   - 'dde-test-encrypt-standardpassword.xlsm', # per ...
36   - 'dde-test-encrypt-standardpassword.xlsb'] # default.
  81 + EXCEPTIONS = ['autostart-encrypt-standardpassword.xls', # These ...
  82 + 'autostart-encrypt-standardpassword.xlsm', # files ...
  83 + 'autostart-encrypt-standardpassword.xlsb', # are ...
  84 + 'dde-test-encrypt-standardpassword.xls', # automati...
  85 + 'dde-test-encrypt-standardpassword.xlsx', # ...cally...
  86 + 'dde-test-encrypt-standardpassword.xlsm', # decrypted.
  87 + 'dde-test-encrypt-standardpassword.xlsb']
37 88 for filename in os.listdir(CRYPT_DIR):
38 89 if filename in EXCEPTIONS:
39 90 continue
40 91 full_name = join(CRYPT_DIR, filename)
41 92 for args in ADD_ARGS:
42   - try:
43   - ret_code = olevba.main(args + [full_name, ])
44   - except SystemExit as se:
45   - ret_code = se.code or 0 # se.code can be None
  93 + _, ret_code = call_and_capture('olevba',
  94 + args=[full_name, ] + args,
  95 + accept_nonzero_exit=True)
46 96 self.assertEqual(ret_code, CRYPT_RETURN_CODE,
47   - msg='Wrong return code {} for args {}'
  97 + msg='Wrong return code {} for args {}'\
48 98 .format(ret_code, args + [filename, ]))
49 99  
50 100  
... ...
tests/olevba/test_crypto.py
... ... @@ -2,20 +2,18 @@
2 2  
3 3 import sys
4 4 import unittest
5   -import os
6   -from os.path import join as pjoin
7   -from subprocess import check_output, CalledProcessError
  5 +from os.path import basename, join as pjoin
8 6 import json
9 7 from collections import OrderedDict
10 8  
11   -from tests.test_utils import DATA_BASE_DIR, SOURCE_BASE_DIR
  9 +from tests.test_utils import DATA_BASE_DIR, call_and_capture
12 10  
13 11 from oletools import crypto
14 12  
15 13  
16 14 @unittest.skipIf(not crypto.check_msoffcrypto(),
17   - 'Module msoffcrypto not installed for python{}.{}'
18   - .format(sys.version_info.major, sys.version_info.minor))
  15 + 'Module msoffcrypto not installed for {}'
  16 + .format(basename(sys.executable)))
19 17 class OlevbaCryptoWriteProtectTest(unittest.TestCase):
20 18 """
21 19 Test documents that are 'write-protected' through encryption.
... ... @@ -34,25 +32,12 @@ class OlevbaCryptoWriteProtectTest(unittest.TestCase):
34 32 """
35 33 def test_autostart(self):
36 34 """Check that autostart macro is found in xls[mb] sample file."""
37   - # create a PYTHONPATH environment var to prefer our olevba
38   - env = os.environ
39   - try:
40   - env['PYTHONPATH'] = SOURCE_BASE_DIR + os.pathsep + \
41   - os.environ['PYTHONPATH']
42   - except KeyError:
43   - env['PYTHONPATH'] = SOURCE_BASE_DIR
44   -
45 35 for suffix in 'xlsm', 'xlsb':
46 36 example_file = pjoin(
47 37 DATA_BASE_DIR, 'encrypted',
48 38 'autostart-encrypt-standardpassword.' + suffix)
49   - try:
50   - output = check_output([sys.executable, '-m', 'olevba', '-j',
51   - example_file],
52   - universal_newlines=True, env=env)
53   - except CalledProcessError as err:
54   - print(err.output)
55   - raise
  39 + output, _ = call_and_capture('olevba', args=('-j', example_file),
  40 + exclude_stderr=True)
56 41 data = json.loads(output, object_pairs_hook=OrderedDict)
57 42 # debug: json.dump(data, sys.stdout, indent=4)
58 43 self.assertEqual(len(data), 4)
... ...
tests/test-data/encrypted/autostart-encrypt-standardpassword.xls 0 → 100755
No preview for this file type
tests/test_utils/__init__.py
1   -from os.path import dirname, join, abspath
2   -
3   -# Base dir of project, contains subdirs "tests" and "oletools" and README.md
4   -PROJECT_ROOT = dirname(dirname(dirname(abspath(__file__))))
5   -
6   -# Directory with test data, independent of current working directory
7   -DATA_BASE_DIR = join(PROJECT_ROOT, 'tests', 'test-data')
8   -
9   -# Directory with source code
10   -SOURCE_BASE_DIR = join(PROJECT_ROOT, 'oletools')
  1 +from .utils import *
... ...
tests/test_utils/utils.py 0 → 100644
  1 +#!/usr/bin/env python3
  2 +
  3 +"""Utils generally useful for unittests."""
  4 +
  5 +import sys
  6 +import os
  7 +from os.path import dirname, join, abspath
  8 +from subprocess import check_output, PIPE, STDOUT, CalledProcessError
  9 +
  10 +
  11 +# Base dir of project, contains subdirs "tests" and "oletools" and README.md
  12 +PROJECT_ROOT = dirname(dirname(dirname(abspath(__file__))))
  13 +
  14 +# Directory with test data, independent of current working directory
  15 +DATA_BASE_DIR = join(PROJECT_ROOT, 'tests', 'test-data')
  16 +
  17 +# Directory with source code
  18 +SOURCE_BASE_DIR = join(PROJECT_ROOT, 'oletools')
  19 +
  20 +
  21 +def call_and_capture(module, args=None, accept_nonzero_exit=False,
  22 + exclude_stderr=False):
  23 + """
  24 + Run module as script, capturing and returning output and return code.
  25 +
  26 + This is the best way to capture a module's stdout and stderr; trying to
  27 + modify sys.stdout/sys.stderr to StringIO-Buffers frequently causes trouble.
  28 +
  29 + Only drawback sofar: stdout and stderr are merged into one (which is
  30 + what users see on their shell as well). When testing for json-compatible
  31 + output you should `exclude_stderr` to `False` since logging ignores stderr,
  32 + so unforseen warnings (e.g. issued by pypy) would mess up your json.
  33 +
  34 + :param str module: name of module to test, e.g. `olevba`
  35 + :param args: arguments for module's main function
  36 + :param bool fail_nonzero: Raise error if command returns non-0 return code
  37 + :param bool exclude_stderr: Exclude output to `sys.stderr` from output
  38 + (e.g. if parsing output through json)
  39 + :returns: ret_code, output
  40 + :rtype: int, str
  41 + """
  42 + # create a PYTHONPATH environment var to prefer our current code
  43 + env = os.environ.copy()
  44 + try:
  45 + env['PYTHONPATH'] = SOURCE_BASE_DIR + os.pathsep + \
  46 + os.environ['PYTHONPATH']
  47 + except KeyError:
  48 + env['PYTHONPATH'] = SOURCE_BASE_DIR
  49 +
  50 + # ensure args is a tuple
  51 + my_args = tuple(args) if args else ()
  52 +
  53 + ret_code = -1
  54 + try:
  55 + output = check_output((sys.executable, '-m', module) + my_args,
  56 + universal_newlines=True, env=env,
  57 + stderr=PIPE if exclude_stderr else STDOUT)
  58 + ret_code = 0
  59 +
  60 + except CalledProcessError as err:
  61 + if accept_nonzero_exit:
  62 + ret_code = err.returncode
  63 + output = err.output
  64 + else:
  65 + print(err.output)
  66 + raise
  67 +
  68 + return output, ret_code
... ...