Commit c422cdf76d14012144d605fe7ff991ea0f7a06a9

Authored by Christian Herdtweck
1 parent ab2a70e9

tests: Re-enable samples skipped because of #215

Some samples triggered antivirus engines, issues #215 and #217 ended with
the agreement to encapsulate problematic samples in encrypted zip
containers and decrypt them on-the-fly. Initial support for this was added
but that did not cover 5 tests. Create on-the-fly decryption for these
tests as well and re-enable them.
tests/msodde/test_basic.py
... ... @@ -12,13 +12,11 @@ import unittest
12 12 import sys
13 13 import os
14 14 from os.path import join, basename
15   -from traceback import print_exc
16   -import json
17   -from collections import OrderedDict
18 15 from oletools import msodde
19 16 from oletools.crypto import \
20 17 WrongEncryptionPassword, CryptoLibNotImported, check_msoffcrypto
21   -from tests.test_utils import call_and_capture, DATA_BASE_DIR as BASE_DIR
  18 +from tests.test_utils import call_and_capture, decrypt_sample,\
  19 + DATA_BASE_DIR as BASE_DIR
22 20  
23 21  
24 22 class TestReturnCode(unittest.TestCase):
... ... @@ -26,14 +24,13 @@ class TestReturnCode(unittest.TestCase):
26 24 def test_valid_doc(self):
27 25 """ check that a valid doc file leads to 0 exit status """
28 26 for filename in (
29   - 'harmless-clean',
30   - # TODO: TEMPORARILY DISABLED UNTIL ISSUE #215 IS FIXED:
31   - # 'dde-test-from-office2003',
32   - # 'dde-test-from-office2016',
33   - # 'dde-test-from-office2013-utf_16le-korean'
  27 + 'harmless-clean.doc',
  28 + 'dde-test-from-office2003.doc.zip',
  29 + 'dde-test-from-office2016.doc.zip',
  30 + 'dde-test-from-office2013-utf_16le-korean.doc.zip',
34 31 ):
35   - self.do_test_validity(join(BASE_DIR, 'msodde',
36   - filename + '.doc'))
  32 + with decrypt_sample(join('msodde', filename)) as temp_name:
  33 + self.do_test_validity(temp_name)
37 34  
38 35 def test_valid_docx(self):
39 36 """ check that a valid docx file leads to 0 exit status """
... ... @@ -52,11 +49,11 @@ class TestReturnCode(unittest.TestCase):
52 49 for filename in (
53 50 'harmless-clean-2003.xml',
54 51 'dde-in-excel2003.xml',
55   - # TODO: TEMPORARILY DISABLED UNTIL ISSUE #215 IS FIXED:
56   - # 'dde-in-word2003.xml',
57   - # 'dde-in-word2007.xml'
  52 + 'dde-in-word2003.xml.zip',
  53 + 'dde-in-word2007.xml.zip'
58 54 ):
59   - self.do_test_validity(join(BASE_DIR, 'msodde', filename))
  55 + with decrypt_sample(join('msodde', filename)) as temp_name:
  56 + self.do_test_validity(temp_name)
60 57  
61 58 def test_invalid_none(self):
62 59 """ check that no file argument leads to non-zero exit status """
... ... @@ -99,13 +96,11 @@ class TestReturnCode(unittest.TestCase):
99 96 def do_test_validity(self, filename, expect_error=None):
100 97 """ helper for test_[in]valid_* """
101 98 found_error = None
102   - # DEBUG: print('Testing file {}'.format(filename))
103 99 try:
104 100 msodde.process_maybe_encrypted(filename,
105   - field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
  101 + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
106 102 except Exception as exc:
107 103 found_error = exc
108   - # DEBUG: print_exc()
109 104  
110 105 if expect_error and not found_error:
111 106 self.fail('Expected {} but msodde finished without errors for {}'
... ... @@ -145,15 +140,14 @@ class TestDdeLinks(unittest.TestCase):
145 140 """
146 141 return [o for o in output.splitlines()]
147 142  
148   - # TODO: TEMPORARILY DISABLED UNTIL ISSUE #215 IS FIXED:
149   - # def test_with_dde(self):
150   - # """ check that dde links appear on stdout """
151   - # filename = 'dde-test-from-office2003.doc'
152   - # output = msodde.process_maybe_encrypted(
153   - # join(BASE_DIR, 'msodde', filename),
154   - # field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
155   - # self.assertNotEqual(len(self.get_dde_from_output(output)), 0,
156   - # msg='Found no dde links in output of ' + filename)
  143 + def test_with_dde(self):
  144 + """ check that dde links appear on stdout """
  145 + filename = 'dde-test-from-office2003.doc.zip'
  146 + with decrypt_sample(join('msodde', filename)) as temp_file:
  147 + output = msodde.process_maybe_encrypted(temp_file,
  148 + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
  149 + self.assertNotEqual(len(self.get_dde_from_output(output)), 0,
  150 + msg='Found no dde links in output of ' + filename)
157 151  
158 152 def test_no_dde(self):
159 153 """ check that no dde links appear on stdout """
... ... @@ -164,15 +158,14 @@ class TestDdeLinks(unittest.TestCase):
164 158 self.assertEqual(len(self.get_dde_from_output(output)), 0,
165 159 msg='Found dde links in output of ' + filename)
166 160  
167   - # TODO: TEMPORARILY DISABLED UNTIL ISSUE #215 IS FIXED:
168   - # def test_with_dde_utf16le(self):
169   - # """ check that dde links appear on stdout """
170   - # filename = 'dde-test-from-office2013-utf_16le-korean.doc'
171   - # output = msodde.process_maybe_encrypted(
172   - # join(BASE_DIR, 'msodde', filename),
173   - # field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
174   - # self.assertNotEqual(len(self.get_dde_from_output(output)), 0,
175   - # msg='Found no dde links in output of ' + filename)
  161 + def test_with_dde_utf16le(self):
  162 + """ check that dde links appear on stdout """
  163 + filename = 'dde-test-from-office2013-utf_16le-korean.doc.zip'
  164 + with decrypt_sample(join('msodde', filename)) as temp_file:
  165 + output = msodde.process_maybe_encrypted(temp_file,
  166 + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
  167 + self.assertNotEqual(len(self.get_dde_from_output(output)), 0,
  168 + msg='Found no dde links in output of ' + filename)
176 169  
177 170 def test_excel(self):
178 171 """ check that dde links are found in excel 2007+ files """
... ... @@ -188,19 +181,19 @@ class TestDdeLinks(unittest.TestCase):
188 181  
189 182 def test_xml(self):
190 183 """ check that dde in xml from word / excel is found """
191   - # TODO: TEMPORARILY DISABLED UNTIL ISSUE #215 IS FIXED:
192   - for name_part in ('excel2003',): #, 'word2003', 'word2007':
193   - filename = 'dde-in-' + name_part + '.xml'
194   - output = msodde.process_maybe_encrypted(
195   - join(BASE_DIR, 'msodde', filename),
196   - field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
197   - links = self.get_dde_from_output(output)
198   - self.assertEqual(len(links), 1, 'found {0} dde-links in {1}'
199   - .format(len(links), filename))
200   - self.assertTrue('cmd' in links[0], 'no "cmd" in dde-link for {0}'
201   - .format(filename))
202   - self.assertTrue('calc' in links[0], 'no "calc" in dde-link for {0}'
203   - .format(filename))
  184 + for filename in ('dde-in-excel2003.xml',
  185 + 'dde-in-word2003.xml.zip',
  186 + 'dde-in-word2007.xml.zip'):
  187 + with decrypt_sample(join('msodde', filename)) as temp_file:
  188 + output = msodde.process_maybe_encrypted(temp_file,
  189 + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
  190 + links = self.get_dde_from_output(output)
  191 + self.assertEqual(len(links), 1, 'found {0} dde-links in {1}'
  192 + .format(len(links), filename))
  193 + self.assertTrue('cmd' in links[0], 'no "cmd" in dde-link for {0}'
  194 + .format(filename))
  195 + self.assertTrue('calc' in links[0], 'no "calc" in dde-link for {0}'
  196 + .format(filename))
204 197  
205 198 def test_clean_rtf_blacklist(self):
206 199 """ find a lot of hyperlinks in rtf spec """
... ...
tests/test_utils/__init__.py
1 1 from .utils import *
  2 +from .testdata_reader import *
2 3 \ No newline at end of file
... ...
tests/test_utils/testdata_reader.py
... ... @@ -7,7 +7,10 @@ using them.
7 7 """
8 8  
9 9 import os, sys, zipfile
10   -from os.path import dirname, abspath, normpath, relpath, join, basename
  10 +from os.path import relpath, join, isfile
  11 +from contextlib import contextmanager
  12 +from tempfile import mkstemp
  13 +
11 14 from . import DATA_BASE_DIR
12 15  
13 16 # Passwort used to encrypt problematic test samples inside a zip container
... ... @@ -82,3 +85,42 @@ def loop_over_files(subdir=''):
82 85 yield relative_path, read_encrypted(relative_path)
83 86 else:
84 87 yield relative_path, read(relative_path)
  88 +
  89 +
  90 +@contextmanager
  91 +def decrypt_sample(relpath):
  92 + """
  93 + Decrypt test sample, save to tempfile, yield temp file name.
  94 +
  95 + Use as context-manager, deletes tempfile after use.
  96 +
  97 + If sample is not encrypted at all (filename does not end in '.zip'),
  98 + yields absolute path to sample itself, so can apply this code also
  99 + to non-encrypted samples.
  100 +
  101 + Code based on test_encoding_handler.temp_file().
  102 +
  103 + :param relpath: path inside `DATA_BASE_DIR`, should end in '.zip'
  104 + :return: absolute path name to decrypted sample.
  105 + """
  106 + if not relpath.endswith('.zip'):
  107 + yield get_path_from_root(relpath)
  108 + else:
  109 + tmp_descriptor = None
  110 + tmp_name = None
  111 + try:
  112 + tmp_descriptor, tmp_name = mkstemp(text=False)
  113 + with zipfile.ZipFile(get_path_from_root(relpath), 'r') as unzipper:
  114 + # no need to iterate over blobs, our test files are all small
  115 + os.write(tmp_descriptor, unzipper.read(unzipper.namelist()[0],
  116 + pwd=ENCRYPTED_FILES_PASSWORD))
  117 + os.close(tmp_descriptor)
  118 + tmp_descriptor = None
  119 + yield tmp_name
  120 + except Exception:
  121 + raise
  122 + finally:
  123 + if tmp_descriptor is not None:
  124 + os.close(tmp_descriptor)
  125 + if tmp_name is not None and isfile(tmp_name):
  126 + os.unlink(tmp_name)
85 127 \ No newline at end of file
... ...