Commit d138bff0f04f215c2b75619eb0a6d52b3e346d99

Authored by Philippe Lagadec
Committed by GitHub
2 parents efa387d4 f3dbed9a

Merge pull request #446 from christian-intra2net/unittest-fixes

Unittest fixes, reduce output
oletools/crypto.py
@@ -236,7 +236,8 @@ def is_encrypted(some_file): @@ -236,7 +236,8 @@ def is_encrypted(some_file):
236 if zipfile.is_zipfile(some_file): 236 if zipfile.is_zipfile(some_file):
237 return _is_encrypted_zip(some_file) 237 return _is_encrypted_zip(some_file)
238 # otherwise assume it is the name of an ole file 238 # otherwise assume it is the name of an ole file
239 - return _is_encrypted_ole(OleFileIO(some_file)) 239 + with OleFileIO(some_file) as ole:
  240 + return _is_encrypted_ole(ole)
240 except Exception as exc: 241 except Exception as exc:
241 log.warning('Failed to check {} for encryption ({}); assume it is not ' 242 log.warning('Failed to check {} for encryption ({}); assume it is not '
242 'encrypted.'.format(some_file, exc)) 243 'encrypted.'.format(some_file, exc))
oletools/msodde.py
@@ -493,17 +493,23 @@ def process_xls(filepath): @@ -493,17 +493,23 @@ def process_xls(filepath):
493 """ find dde links in excel ole file """ 493 """ find dde links in excel ole file """
494 494
495 result = [] 495 result = []
496 - for stream in xls_parser.XlsFile(filepath).iter_streams():  
497 - if not isinstance(stream, xls_parser.WorkbookStream):  
498 - continue  
499 - for record in stream.iter_records():  
500 - if not isinstance(record, xls_parser.XlsRecordSupBook): 496 + xls_file = None
  497 + try:
  498 + xls_file = xls_parser.XlsFile(filepath)
  499 + for stream in xls_file.iter_streams():
  500 + if not isinstance(stream, xls_parser.WorkbookStream):
501 continue 501 continue
502 - if record.support_link_type in (  
503 - xls_parser.XlsRecordSupBook.LINK_TYPE_OLE_DDE,  
504 - xls_parser.XlsRecordSupBook.LINK_TYPE_EXTERNAL):  
505 - result.append(record.virt_path.replace(u'\u0003', u' '))  
506 - return u'\n'.join(result) 502 + for record in stream.iter_records():
  503 + if not isinstance(record, xls_parser.XlsRecordSupBook):
  504 + continue
  505 + if record.support_link_type in (
  506 + xls_parser.XlsRecordSupBook.LINK_TYPE_OLE_DDE,
  507 + xls_parser.XlsRecordSupBook.LINK_TYPE_EXTERNAL):
  508 + result.append(record.virt_path.replace(u'\u0003', u' '))
  509 + return u'\n'.join(result)
  510 + finally:
  511 + if xls_file is not None:
  512 + xls_file.close()
507 513
508 514
509 def process_docx(filepath, field_filter_mode=None): 515 def process_docx(filepath, field_filter_mode=None):
@@ -908,13 +914,12 @@ def process_file(filepath, field_filter_mode=None): @@ -908,13 +914,12 @@ def process_file(filepath, field_filter_mode=None):
908 if xls_parser.is_xls(filepath): 914 if xls_parser.is_xls(filepath):
909 logger.debug('Process file as excel 2003 (xls)') 915 logger.debug('Process file as excel 2003 (xls)')
910 return process_xls(filepath) 916 return process_xls(filepath)
911 -  
912 - ole = olefile.OleFileIO(filepath, path_encoding=None)  
913 - if is_ppt(ole): 917 + if is_ppt(filepath):
914 logger.debug('is ppt - cannot have DDE') 918 logger.debug('is ppt - cannot have DDE')
915 return u'' 919 return u''
916 logger.debug('Process file as word 2003 (doc)') 920 logger.debug('Process file as word 2003 (doc)')
917 - return process_doc(ole) 921 + with olefile.OleFileIO(filepath, path_encoding=None) as ole:
  922 + return process_doc(ole)
918 923
919 with open(filepath, 'rb') as file_handle: 924 with open(filepath, 'rb') as file_handle:
920 if file_handle.read(4) == RTF_START: 925 if file_handle.read(4) == RTF_START:
@@ -970,6 +975,7 @@ def process_maybe_encrypted(filepath, passwords=None, crypto_nesting=0, @@ -970,6 +975,7 @@ def process_maybe_encrypted(filepath, passwords=None, crypto_nesting=0,
970 if not crypto.is_encrypted(filepath): 975 if not crypto.is_encrypted(filepath):
971 return result 976 return result
972 except Exception: 977 except Exception:
  978 + logger.debug('Ignoring exception:', exc_info=True)
973 if not crypto.is_encrypted(filepath): 979 if not crypto.is_encrypted(filepath):
974 raise 980 raise
975 981
@@ -997,7 +1003,8 @@ def process_maybe_encrypted(filepath, passwords=None, crypto_nesting=0, @@ -997,7 +1003,8 @@ def process_maybe_encrypted(filepath, passwords=None, crypto_nesting=0,
997 try: # (maybe file was not yet created) 1003 try: # (maybe file was not yet created)
998 os.unlink(decrypted_file) 1004 os.unlink(decrypted_file)
999 except Exception: 1005 except Exception:
1000 - pass 1006 + logger.debug('Ignoring exception closing decrypted file:',
  1007 + exc_info=True)
1001 return result 1008 return result
1002 1009
1003 1010
oletools/oleobj.py
@@ -526,29 +526,35 @@ def find_ole_in_ppt(filename): @@ -526,29 +526,35 @@ def find_ole_in_ppt(filename):
526 can contain the actual embedded file we are looking for (caller will check 526 can contain the actual embedded file we are looking for (caller will check
527 for these). 527 for these).
528 """ 528 """
529 - for stream in PptFile(filename).iter_streams():  
530 - for record_idx, record in enumerate(stream.iter_records()):  
531 - if isinstance(record, PptRecordExOleVbaActiveXAtom):  
532 - ole = None  
533 - try:  
534 - data_start = next(record.iter_uncompressed())  
535 - if data_start[:len(olefile.MAGIC)] != olefile.MAGIC:  
536 - continue # could be an ActiveX control or VBA Storage  
537 -  
538 - # otherwise, this should be an OLE object  
539 - log.debug('Found record with embedded ole object in ppt '  
540 - '(stream "{0}", record no {1})'  
541 - .format(stream.name, record_idx))  
542 - ole = record.get_data_as_olefile()  
543 - yield ole  
544 - except IOError:  
545 - log.warning('Error reading data from {0} stream or '  
546 - 'interpreting it as OLE object'  
547 - .format(stream.name))  
548 - log.debug('', exc_info=True)  
549 - finally:  
550 - if ole is not None:  
551 - ole.close() 529 + ppt_file = None
  530 + try:
  531 + ppt_file = PptFile(filename)
  532 + for stream in ppt_file.iter_streams():
  533 + for record_idx, record in enumerate(stream.iter_records()):
  534 + if isinstance(record, PptRecordExOleVbaActiveXAtom):
  535 + ole = None
  536 + try:
  537 + data_start = next(record.iter_uncompressed())
  538 + if data_start[:len(olefile.MAGIC)] != olefile.MAGIC:
  539 + continue # could be ActiveX control / VBA Storage
  540 +
  541 + # otherwise, this should be an OLE object
  542 + log.debug('Found record with embedded ole object in '
  543 + 'ppt (stream "{0}", record no {1})'
  544 + .format(stream.name, record_idx))
  545 + ole = record.get_data_as_olefile()
  546 + yield ole
  547 + except IOError:
  548 + log.warning('Error reading data from {0} stream or '
  549 + 'interpreting it as OLE object'
  550 + .format(stream.name))
  551 + log.debug('', exc_info=True)
  552 + finally:
  553 + if ole is not None:
  554 + ole.close()
  555 + finally:
  556 + if ppt_file is not None:
  557 + ppt_file.close()
552 558
553 559
554 class FakeFile(io.RawIOBase): 560 class FakeFile(io.RawIOBase):
oletools/ppt_record_parser.py
@@ -63,7 +63,6 @@ except ImportError: @@ -63,7 +63,6 @@ except ImportError:
63 sys.path.insert(0, PARENT_DIR) 63 sys.path.insert(0, PARENT_DIR)
64 del PARENT_DIR 64 del PARENT_DIR
65 from oletools import record_base 65 from oletools import record_base
66 -from oletools.common.errors import CryptoErrorBase  
67 66
68 67
69 # types of relevant records (there are much more than listed here) 68 # types of relevant records (there are much more than listed here)
@@ -109,10 +108,11 @@ RECORD_TYPES = dict([ @@ -109,10 +108,11 @@ RECORD_TYPES = dict([
109 ]) 108 ])
110 109
111 110
112 -# record types where version is not 0x0 or 0xf 111 +# record types where version is not 0x0 or 0x1 or 0xf
113 VERSION_EXCEPTIONS = dict([ 112 VERSION_EXCEPTIONS = dict([
114 (0x0400, 2), # rt_vbainfoatom 113 (0x0400, 2), # rt_vbainfoatom
115 (0x03ef, 2), # rt_slideatom 114 (0x03ef, 2), # rt_slideatom
  115 + (0xe9c7, 7), # tests/test-data/encrypted/encrypted.ppt, not investigated
116 ]) 116 ])
117 117
118 118
@@ -174,7 +174,7 @@ def is_ppt(filename): @@ -174,7 +174,7 @@ def is_ppt(filename):
174 for record in stream.iter_records(): 174 for record in stream.iter_records():
175 if record.type == 0x0ff5: # UserEditAtom 175 if record.type == 0x0ff5: # UserEditAtom
176 have_user_edit = True 176 have_user_edit = True
177 - elif record.type == 0x1772: # PersisDirectoryAtom 177 + elif record.type == 0x1772: # PersistDirectoryAtom
178 have_persist_dir = True 178 have_persist_dir = True
179 elif record.type == 0x03e8: # DocumentContainer 179 elif record.type == 0x03e8: # DocumentContainer
180 have_document_container = True 180 have_document_container = True
@@ -185,10 +185,12 @@ def is_ppt(filename): @@ -185,10 +185,12 @@ def is_ppt(filename):
185 return True 185 return True
186 else: # ignore other streams/storages since they are optional 186 else: # ignore other streams/storages since they are optional
187 continue 187 continue
188 - except CryptoErrorBase:  
189 - raise  
190 - except Exception:  
191 - pass 188 + except Exception as exc:
  189 + logging.debug('Ignoring exception in is_ppt, assume is not ppt',
  190 + exc_info=True)
  191 + finally:
  192 + if ppt_file is not None:
  193 + ppt_file.close()
192 return False 194 return False
193 195
194 196
oletools/xls_parser.py
@@ -88,12 +88,18 @@ def is_xls(filename): @@ -88,12 +88,18 @@ def is_xls(filename):
88 substream. 88 substream.
89 See also: oleid.OleID.check_excel 89 See also: oleid.OleID.check_excel
90 """ 90 """
  91 + xls_file = None
91 try: 92 try:
92 - for stream in XlsFile(filename).iter_streams(): 93 + xls_file = XlsFile(filename)
  94 + for stream in xls_file.iter_streams():
93 if isinstance(stream, WorkbookStream): 95 if isinstance(stream, WorkbookStream):
94 return True 96 return True
95 except Exception: 97 except Exception:
96 - pass 98 + logging.debug('Ignoring exception in is_xls, assume is not xls',
  99 + exc_info=True)
  100 + finally:
  101 + if xls_file is not None:
  102 + xls_file.close()
97 return False 103 return False
98 104
99 105
tests/msodde/test_basic.py
@@ -9,11 +9,14 @@ Ensure that @@ -9,11 +9,14 @@ Ensure that
9 from __future__ import print_function 9 from __future__ import print_function
10 10
11 import unittest 11 import unittest
12 -from oletools import msodde  
13 -from tests.test_utils import DATA_BASE_DIR as BASE_DIR 12 +import sys
14 import os 13 import os
15 from os.path import join 14 from os.path import join
16 from traceback import print_exc 15 from traceback import print_exc
  16 +from oletools import msodde
  17 +from oletools.crypto import \
  18 + WrongEncryptionPassword, CryptoLibNotImported, check_msoffcrypto
  19 +from tests.test_utils import DATA_BASE_DIR as BASE_DIR
17 20
18 21
19 class TestReturnCode(unittest.TestCase): 22 class TestReturnCode(unittest.TestCase):
@@ -46,15 +49,21 @@ class TestReturnCode(unittest.TestCase): @@ -46,15 +49,21 @@ class TestReturnCode(unittest.TestCase):
46 49
47 def test_invalid_none(self): 50 def test_invalid_none(self):
48 """ check that no file argument leads to non-zero exit status """ 51 """ check that no file argument leads to non-zero exit status """
49 - self.do_test_validity('', True) 52 + if sys.hexversion > 0x03030000: # version 3.3 and higher
  53 + # different errors probably depending on whether msoffcryto is
  54 + # available or not
  55 + expect_error = (AttributeError, FileNotFoundError)
  56 + else:
  57 + expect_error = (AttributeError, IOError)
  58 + self.do_test_validity('', expect_error)
50 59
51 def test_invalid_empty(self): 60 def test_invalid_empty(self):
52 """ check that empty file argument leads to non-zero exit status """ 61 """ check that empty file argument leads to non-zero exit status """
53 - self.do_test_validity(join(BASE_DIR, 'basic/empty'), True) 62 + self.do_test_validity(join(BASE_DIR, 'basic/empty'), Exception)
54 63
55 def test_invalid_text(self): 64 def test_invalid_text(self):
56 """ check that text file argument leads to non-zero exit status """ 65 """ check that text file argument leads to non-zero exit status """
57 - self.do_test_validity(join(BASE_DIR, 'basic/text'), True) 66 + self.do_test_validity(join(BASE_DIR, 'basic/text'), Exception)
58 67
59 def test_encrypted(self): 68 def test_encrypted(self):
60 """ 69 """
@@ -64,28 +73,38 @@ class TestReturnCode(unittest.TestCase): @@ -64,28 +73,38 @@ class TestReturnCode(unittest.TestCase):
64 Encryption) is tested. 73 Encryption) is tested.
65 """ 74 """
66 CRYPT_DIR = join(BASE_DIR, 'encrypted') 75 CRYPT_DIR = join(BASE_DIR, 'encrypted')
67 - ADD_ARGS = '', '-j', '-d', '-f', '-a' 76 + have_crypto = check_msoffcrypto()
68 for filename in os.listdir(CRYPT_DIR): 77 for filename in os.listdir(CRYPT_DIR):
69 - full_name = join(CRYPT_DIR, filename)  
70 - for args in ADD_ARGS:  
71 - self.do_test_validity(args + ' ' + full_name, True)  
72 -  
73 - def do_test_validity(self, args, expect_error=False):  
74 - """ helper for test_valid_doc[x] """  
75 - have_exception = False 78 + if have_crypto and 'standardpassword' in filename:
  79 + # these are automagically decrypted
  80 + self.do_test_validity(join(CRYPT_DIR, filename))
  81 + elif have_crypto:
  82 + self.do_test_validity(join(CRYPT_DIR, filename),
  83 + WrongEncryptionPassword)
  84 + else:
  85 + self.do_test_validity(join(CRYPT_DIR, filename),
  86 + CryptoLibNotImported)
  87 +
  88 + def do_test_validity(self, filename, expect_error=None):
  89 + """ helper for test_[in]valid_* """
  90 + found_error = None
  91 + # DEBUG: print('Testing file {}'.format(filename))
76 try: 92 try:
77 - msodde.process_file(args, msodde.FIELD_FILTER_BLACKLIST)  
78 - except Exception:  
79 - have_exception = True  
80 - print_exc()  
81 - except SystemExit as exc: # sys.exit() was called  
82 - have_exception = True  
83 - if exc.code is None:  
84 - have_exception = False  
85 -  
86 - self.assertEqual(expect_error, have_exception,  
87 - msg='Args={0}, expect={1}, exc={2}'  
88 - .format(args, expect_error, have_exception)) 93 + msodde.process_maybe_encrypted(filename,
  94 + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
  95 + except Exception as exc:
  96 + found_error = exc
  97 + # DEBUG: print_exc()
  98 +
  99 + if expect_error and not found_error:
  100 + self.fail('Expected {} but msodde finished without errors for {}'
  101 + .format(expect_error, filename))
  102 + elif not expect_error and found_error:
  103 + self.fail('Unexpected error {} from msodde for {}'
  104 + .format(found_error, filename))
  105 + elif expect_error and not isinstance(found_error, expect_error):
  106 + self.fail('Wrong kind of error {} from msodde for {}, expected {}'
  107 + .format(type(found_error), filename, expect_error))
89 108
90 109
91 class TestDdeLinks(unittest.TestCase): 110 class TestDdeLinks(unittest.TestCase):
@@ -100,24 +119,27 @@ class TestDdeLinks(unittest.TestCase): @@ -100,24 +119,27 @@ class TestDdeLinks(unittest.TestCase):
100 def test_with_dde(self): 119 def test_with_dde(self):
101 """ check that dde links appear on stdout """ 120 """ check that dde links appear on stdout """
102 filename = 'dde-test-from-office2003.doc' 121 filename = 'dde-test-from-office2003.doc'
103 - output = msodde.process_file(  
104 - join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST) 122 + output = msodde.process_maybe_encrypted(
  123 + join(BASE_DIR, 'msodde', filename),
  124 + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
105 self.assertNotEqual(len(self.get_dde_from_output(output)), 0, 125 self.assertNotEqual(len(self.get_dde_from_output(output)), 0,
106 msg='Found no dde links in output of ' + filename) 126 msg='Found no dde links in output of ' + filename)
107 127
108 def test_no_dde(self): 128 def test_no_dde(self):
109 """ check that no dde links appear on stdout """ 129 """ check that no dde links appear on stdout """
110 filename = 'harmless-clean.doc' 130 filename = 'harmless-clean.doc'
111 - output = msodde.process_file(  
112 - join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST) 131 + output = msodde.process_maybe_encrypted(
  132 + join(BASE_DIR, 'msodde', filename),
  133 + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
113 self.assertEqual(len(self.get_dde_from_output(output)), 0, 134 self.assertEqual(len(self.get_dde_from_output(output)), 0,
114 msg='Found dde links in output of ' + filename) 135 msg='Found dde links in output of ' + filename)
115 136
116 def test_with_dde_utf16le(self): 137 def test_with_dde_utf16le(self):
117 """ check that dde links appear on stdout """ 138 """ check that dde links appear on stdout """
118 filename = 'dde-test-from-office2013-utf_16le-korean.doc' 139 filename = 'dde-test-from-office2013-utf_16le-korean.doc'
119 - output = msodde.process_file(  
120 - join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST) 140 + output = msodde.process_maybe_encrypted(
  141 + join(BASE_DIR, 'msodde', filename),
  142 + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
121 self.assertNotEqual(len(self.get_dde_from_output(output)), 0, 143 self.assertNotEqual(len(self.get_dde_from_output(output)), 0,
122 msg='Found no dde links in output of ' + filename) 144 msg='Found no dde links in output of ' + filename)
123 145
@@ -125,8 +147,9 @@ class TestDdeLinks(unittest.TestCase): @@ -125,8 +147,9 @@ class TestDdeLinks(unittest.TestCase):
125 """ check that dde links are found in excel 2007+ files """ 147 """ check that dde links are found in excel 2007+ files """
126 expect = ['cmd /c calc.exe', ] 148 expect = ['cmd /c calc.exe', ]
127 for extn in 'xlsx', 'xlsm', 'xlsb': 149 for extn in 'xlsx', 'xlsm', 'xlsb':
128 - output = msodde.process_file(  
129 - join(BASE_DIR, 'msodde', 'dde-test.' + extn), msodde.FIELD_FILTER_BLACKLIST) 150 + output = msodde.process_maybe_encrypted(
  151 + join(BASE_DIR, 'msodde', 'dde-test.' + extn),
  152 + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
130 153
131 self.assertEqual(expect, self.get_dde_from_output(output), 154 self.assertEqual(expect, self.get_dde_from_output(output),
132 msg='unexpected output for dde-test.{0}: {1}' 155 msg='unexpected output for dde-test.{0}: {1}'
@@ -136,8 +159,9 @@ class TestDdeLinks(unittest.TestCase): @@ -136,8 +159,9 @@ class TestDdeLinks(unittest.TestCase):
136 """ check that dde in xml from word / excel is found """ 159 """ check that dde in xml from word / excel is found """
137 for name_part in 'excel2003', 'word2003', 'word2007': 160 for name_part in 'excel2003', 'word2003', 'word2007':
138 filename = 'dde-in-' + name_part + '.xml' 161 filename = 'dde-in-' + name_part + '.xml'
139 - output = msodde.process_file(  
140 - join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST) 162 + output = msodde.process_maybe_encrypted(
  163 + join(BASE_DIR, 'msodde', filename),
  164 + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
141 links = self.get_dde_from_output(output) 165 links = self.get_dde_from_output(output)
142 self.assertEqual(len(links), 1, 'found {0} dde-links in {1}' 166 self.assertEqual(len(links), 1, 'found {0} dde-links in {1}'
143 .format(len(links), filename)) 167 .format(len(links), filename))
@@ -149,15 +173,17 @@ class TestDdeLinks(unittest.TestCase): @@ -149,15 +173,17 @@ class TestDdeLinks(unittest.TestCase):
149 def test_clean_rtf_blacklist(self): 173 def test_clean_rtf_blacklist(self):
150 """ find a lot of hyperlinks in rtf spec """ 174 """ find a lot of hyperlinks in rtf spec """
151 filename = 'RTF-Spec-1.7.rtf' 175 filename = 'RTF-Spec-1.7.rtf'
152 - output = msodde.process_file(  
153 - join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_BLACKLIST) 176 + output = msodde.process_maybe_encrypted(
  177 + join(BASE_DIR, 'msodde', filename),
  178 + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST)
154 self.assertEqual(len(self.get_dde_from_output(output)), 1413) 179 self.assertEqual(len(self.get_dde_from_output(output)), 1413)
155 180
156 def test_clean_rtf_ddeonly(self): 181 def test_clean_rtf_ddeonly(self):
157 """ find no dde links in rtf spec """ 182 """ find no dde links in rtf spec """
158 filename = 'RTF-Spec-1.7.rtf' 183 filename = 'RTF-Spec-1.7.rtf'
159 - output = msodde.process_file(  
160 - join(BASE_DIR, 'msodde', filename), msodde.FIELD_FILTER_DDE) 184 + output = msodde.process_maybe_encrypted(
  185 + join(BASE_DIR, 'msodde', filename),
  186 + field_filter_mode=msodde.FIELD_FILTER_DDE)
161 self.assertEqual(len(self.get_dde_from_output(output)), 0, 187 self.assertEqual(len(self.get_dde_from_output(output)), 0,
162 msg='Found dde links in output of ' + filename) 188 msg='Found dde links in output of ' + filename)
163 189
tests/msodde/test_crypto.py
@@ -4,10 +4,9 @@ import sys @@ -4,10 +4,9 @@ import sys
4 import unittest 4 import unittest
5 from os.path import basename, join as pjoin 5 from os.path import basename, join as pjoin
6 6
7 -from tests.test_utils import DATA_BASE_DIR 7 +from tests.test_utils import DATA_BASE_DIR, call_and_capture
8 8
9 from oletools import crypto 9 from oletools import crypto
10 -from oletools import msodde  
11 10
12 11
13 @unittest.skipIf(not crypto.check_msoffcrypto(), 12 @unittest.skipIf(not crypto.check_msoffcrypto(),
@@ -15,15 +14,18 @@ from oletools import msodde @@ -15,15 +14,18 @@ from oletools import msodde
15 .format(basename(sys.executable))) 14 .format(basename(sys.executable)))
16 class MsoddeCryptoTest(unittest.TestCase): 15 class MsoddeCryptoTest(unittest.TestCase):
17 """Test integration of decryption in msodde.""" 16 """Test integration of decryption in msodde."""
  17 +
18 def test_standard_password(self): 18 def test_standard_password(self):
19 """Check dde-link is found in xls[mb] sample files.""" 19 """Check dde-link is found in xls[mb] sample files."""
20 for suffix in 'xls', 'xlsx', 'xlsm', 'xlsb': 20 for suffix in 'xls', 'xlsx', 'xlsm', 'xlsb':
21 example_file = pjoin(DATA_BASE_DIR, 'encrypted', 21 example_file = pjoin(DATA_BASE_DIR, 'encrypted',
22 'dde-test-encrypt-standardpassword.' + suffix) 22 'dde-test-encrypt-standardpassword.' + suffix)
23 - link_text = msodde.process_maybe_encrypted(example_file)  
24 - self.assertEqual(link_text, 'cmd /c calc.exe',  
25 - msg='Unexpected output {!r} for {}'  
26 - .format(link_text, suffix)) 23 + output, _ = call_and_capture('msodde', [example_file, ])
  24 + self.assertIn('\nDDE Links:\ncmd /c calc.exe\n', output,
  25 + msg='Unexpected output {!r} for {}'
  26 + .format(output, suffix))
  27 +
  28 + # TODO: add more, in particular a sample with a "proper" password
27 29
28 30
29 if __name__ == '__main__': 31 if __name__ == '__main__':
tests/oleobj/test_basic.py
@@ -8,7 +8,7 @@ from hashlib import md5 @@ -8,7 +8,7 @@ from hashlib import md5
8 from glob import glob 8 from glob import glob
9 9
10 # Directory with test data, independent of current working directory 10 # Directory with test data, independent of current working directory
11 -from tests.test_utils import DATA_BASE_DIR 11 +from tests.test_utils import DATA_BASE_DIR, call_and_capture
12 from oletools import oleobj 12 from oletools import oleobj
13 13
14 14
@@ -81,10 +81,6 @@ class TestOleObj(unittest.TestCase): @@ -81,10 +81,6 @@ class TestOleObj(unittest.TestCase):
81 """ fixture start: create temp dir """ 81 """ fixture start: create temp dir """
82 self.temp_dir = mkdtemp(prefix='oletools-oleobj-') 82 self.temp_dir = mkdtemp(prefix='oletools-oleobj-')
83 self.did_fail = False 83 self.did_fail = False
84 - if DEBUG:  
85 - import logging  
86 - logging.basicConfig(level=logging.DEBUG if DEBUG else logging.INFO)  
87 - oleobj.log.setLevel(logging.NOTSET)  
88 84
89 def tearDown(self): 85 def tearDown(self):
90 """ fixture end: remove temp dir """ 86 """ fixture end: remove temp dir """
@@ -101,7 +97,8 @@ class TestOleObj(unittest.TestCase): @@ -101,7 +97,8 @@ class TestOleObj(unittest.TestCase):
101 """ 97 """
102 test that oleobj can be called with -i and -v 98 test that oleobj can be called with -i and -v
103 99
104 - this is the way that amavisd calls oleobj, thinking it is ripOLE 100 + This is how ripOLE used to be often called (e.g. by amavisd-new);
  101 + ensure oleobj is a compatible replacement.
105 """ 102 """
106 self.do_test_md5(['-d', self.temp_dir, '-v', '-i']) 103 self.do_test_md5(['-d', self.temp_dir, '-v', '-i'])
107 104
@@ -112,35 +109,52 @@ class TestOleObj(unittest.TestCase): @@ -112,35 +109,52 @@ class TestOleObj(unittest.TestCase):
112 'embedded-simple-2007.xml', 109 'embedded-simple-2007.xml',
113 'embedded-simple-2007-as2003.xml'): 110 'embedded-simple-2007-as2003.xml'):
114 full_name = join(DATA_BASE_DIR, 'oleobj', sample_name) 111 full_name = join(DATA_BASE_DIR, 'oleobj', sample_name)
115 - ret_val = oleobj.main(args + [full_name, ]) 112 + output, ret_val = call_and_capture('oleobj', args + [full_name, ],
  113 + accept_nonzero_exit=True)
116 if glob(self.temp_dir + 'ole-object-*'): 114 if glob(self.temp_dir + 'ole-object-*'):
117 - self.fail('found embedded data in {0}'.format(sample_name))  
118 - self.assertEqual(ret_val, oleobj.RETURN_NO_DUMP) 115 + self.fail('found embedded data in {0}. Output:\n{1}'
  116 + .format(sample_name, output))
  117 + self.assertEqual(ret_val, oleobj.RETURN_NO_DUMP,
  118 + msg='Wrong return value {} for {}. Output:\n{}'
  119 + .format(ret_val, sample_name, output))
119 120
120 - def do_test_md5(self, args, test_fun=oleobj.main): 121 + def do_test_md5(self, args, test_fun=None, only_run_every=1):
121 """ helper for test_md5 and test_md5_args """ 122 """ helper for test_md5 and test_md5_args """
122 - # name of sample, extension of embedded file, md5 hash of embedded file  
123 data_dir = join(DATA_BASE_DIR, 'oleobj') 123 data_dir = join(DATA_BASE_DIR, 'oleobj')
124 - for sample_name, embedded_name, expect_hash in SAMPLES:  
125 - ret_val = test_fun(args + [join(data_dir, sample_name), ])  
126 - self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP) 124 +
  125 + # name of sample, extension of embedded file, md5 hash of embedded file
  126 + for sample_index, (sample_name, embedded_name, expect_hash) \
  127 + in enumerate(SAMPLES):
  128 + if sample_index % only_run_every != 0:
  129 + continue
  130 + args_with_path = args + [join(data_dir, sample_name), ]
  131 + if test_fun is None:
  132 + output, ret_val = call_and_capture('oleobj', args_with_path,
  133 + accept_nonzero_exit=True)
  134 + else:
  135 + ret_val = test_fun(args_with_path)
  136 + output = '[output: see above]'
  137 + self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP,
  138 + msg='Wrong return value {} for {}. Output:\n{}'
  139 + .format(ret_val, sample_name, output))
127 expect_name = join(self.temp_dir, 140 expect_name = join(self.temp_dir,
128 sample_name + '_' + embedded_name) 141 sample_name + '_' + embedded_name)
129 if not isfile(expect_name): 142 if not isfile(expect_name):
130 self.did_fail = True 143 self.did_fail = True
131 - self.fail('{0} not created from {1}'.format(expect_name,  
132 - sample_name)) 144 + self.fail('{0} not created from {1}. Output:\n{2}'
  145 + .format(expect_name, sample_name, output))
133 continue 146 continue
134 md5_hash = calc_md5(expect_name) 147 md5_hash = calc_md5(expect_name)
135 if md5_hash != expect_hash: 148 if md5_hash != expect_hash:
136 self.did_fail = True 149 self.did_fail = True
137 - self.fail('Wrong md5 {0} of {1} from {2}'  
138 - .format(md5_hash, expect_name, sample_name)) 150 + self.fail('Wrong md5 {0} of {1} from {2}. Output:\n{3}'
  151 + .format(md5_hash, expect_name, sample_name, output))
139 continue 152 continue
140 153
141 def test_non_streamed(self): 154 def test_non_streamed(self):
142 """ Ensure old oleobj behaviour still works: pre-read whole file """ 155 """ Ensure old oleobj behaviour still works: pre-read whole file """
143 - return self.do_test_md5(['-d', self.temp_dir], test_fun=preread_file) 156 + return self.do_test_md5(['-d', self.temp_dir], test_fun=preread_file,
  157 + only_run_every=4)
144 158
145 159
146 # just in case somebody calls this file as a script 160 # just in case somebody calls this file as a script
tests/oleobj/test_external_links.py
@@ -6,7 +6,7 @@ import os @@ -6,7 +6,7 @@ import os
6 from os import path 6 from os import path
7 7
8 # Directory with test data, independent of current working directory 8 # Directory with test data, independent of current working directory
9 -from tests.test_utils import DATA_BASE_DIR 9 +from tests.test_utils import DATA_BASE_DIR, call_and_capture
10 from oletools import oleobj 10 from oletools import oleobj
11 11
12 BASE_DIR = path.join(DATA_BASE_DIR, 'oleobj', 'external_link') 12 BASE_DIR = path.join(DATA_BASE_DIR, 'oleobj', 'external_link')
@@ -22,8 +22,11 @@ class TestExternalLinks(unittest.TestCase): @@ -22,8 +22,11 @@ class TestExternalLinks(unittest.TestCase):
22 for filename in filenames: 22 for filename in filenames:
23 file_path = path.join(dirpath, filename) 23 file_path = path.join(dirpath, filename)
24 24
25 - ret_val = oleobj.main([file_path])  
26 - self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP) 25 + output, ret_val = call_and_capture('oleobj', [file_path, ],
  26 + accept_nonzero_exit=True)
  27 + self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP,
  28 + msg='Wrong return value {} for {}. Output:\n{}'
  29 + .format(ret_val, filename, output))
27 30
28 31
29 # just in case somebody calls this file as a script 32 # just in case somebody calls this file as a script
tests/ooxml/test_zip_sub_file.py
@@ -144,15 +144,15 @@ class TestZipSubFile(unittest.TestCase): @@ -144,15 +144,15 @@ class TestZipSubFile(unittest.TestCase):
144 self.subfile.seek(0, os.SEEK_END) 144 self.subfile.seek(0, os.SEEK_END)
145 self.compare.seek(0, os.SEEK_END) 145 self.compare.seek(0, os.SEEK_END)
146 146
147 - self.assertEquals(self.compare.read(10), self.subfile.read(10))  
148 - self.assertEquals(self.compare.tell(), self.subfile.tell()) 147 + self.assertEqual(self.compare.read(10), self.subfile.read(10))
  148 + self.assertEqual(self.compare.tell(), self.subfile.tell())
149 149
150 self.subfile.seek(0) 150 self.subfile.seek(0)
151 self.compare.seek(0) 151 self.compare.seek(0)
152 self.subfile.seek(len(FILE_CONTENTS) - 1) 152 self.subfile.seek(len(FILE_CONTENTS) - 1)
153 self.compare.seek(len(FILE_CONTENTS) - 1) 153 self.compare.seek(len(FILE_CONTENTS) - 1)
154 - self.assertEquals(self.compare.read(10), self.subfile.read(10))  
155 - self.assertEquals(self.compare.tell(), self.subfile.tell()) 154 + self.assertEqual(self.compare.read(10), self.subfile.read(10))
  155 + self.assertEqual(self.compare.tell(), self.subfile.tell())
156 156
157 def test_error_seek(self): 157 def test_error_seek(self):
158 """ test correct behaviour if seek beyond end (no exception) """ 158 """ test correct behaviour if seek beyond end (no exception) """
tests/test_utils/utils.py
@@ -47,6 +47,13 @@ def call_and_capture(module, args=None, accept_nonzero_exit=False, @@ -47,6 +47,13 @@ def call_and_capture(module, args=None, accept_nonzero_exit=False,
47 except KeyError: 47 except KeyError:
48 env['PYTHONPATH'] = SOURCE_BASE_DIR 48 env['PYTHONPATH'] = SOURCE_BASE_DIR
49 49
  50 + # hack: in python2 output encoding (sys.stdout.encoding) was None
  51 + # although sys.getdefaultencoding() and sys.getfilesystemencoding were ok
  52 + # TODO: maybe can remove this once branch
  53 + # "encoding-for-non-unicode-environments" is merged
  54 + if 'PYTHONIOENCODING' not in env:
  55 + env['PYTHONIOENCODING'] = 'utf8'
  56 +
50 # ensure args is a tuple 57 # ensure args is a tuple
51 my_args = tuple(args) if args else () 58 my_args = tuple(args) if args else ()
52 59