Commit cf1532fcc4ba929f10c06375970ef4e5e5f89d38
1 parent
f0b17938
tests: Re-create oleid tests
With the creation of ftguess oleid has been changed a lot, the output of its .check() method is very different. Instead of just adapting the existing unittests, I re-created the complete test, trying to make it easier to change/extend in the future.
Showing
1 changed file
with
118 additions
and
162 deletions
tests/oleid/test_basic.py
| @@ -8,174 +8,130 @@ import unittest | @@ -8,174 +8,130 @@ import unittest | ||
| 8 | import os | 8 | import os |
| 9 | from os.path import join, relpath, splitext | 9 | from os.path import join, relpath, splitext |
| 10 | from oletools import oleid | 10 | from oletools import oleid |
| 11 | +from oletools.ftguess import CONTAINER | ||
| 11 | 12 | ||
| 12 | -# Directory with test data, independent of current working directory | ||
| 13 | -from tests.test_utils import DATA_BASE_DIR | 13 | +from tests.test_utils.testdata_reader import loop_over_files, DATA_BASE_DIR |
| 14 | 14 | ||
| 15 | 15 | ||
| 16 | class TestOleIDBasic(unittest.TestCase): | 16 | class TestOleIDBasic(unittest.TestCase): |
| 17 | """Test basic functionality of OleID""" | 17 | """Test basic functionality of OleID""" |
| 18 | 18 | ||
| 19 | - def test_all(self): | ||
| 20 | - """Run all file in test-data through oleid and compare to known ouput""" | ||
| 21 | - # this relies on order of indicators being constant, could relax that | ||
| 22 | - # Also requires that files have the correct suffixes (no rtf in doc) | ||
| 23 | - NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '', '.odt', '.ods', '.odp') | ||
| 24 | - NON_OLE_VALUES = (False, ) | ||
| 25 | - WORD = b'Microsoft Office Word' | ||
| 26 | - PPT = b'Microsoft Office PowerPoint' | ||
| 27 | - EXCEL = b'Microsoft Excel' | ||
| 28 | - CRYPT = (True, False, 'unknown', True, False, False, False, False, | ||
| 29 | - False, False, 0) | ||
| 30 | - OLE_VALUES = { | ||
| 31 | - 'oleobj/sample_with_lnk_file.doc': (True, True, WORD, False, True, | ||
| 32 | - False, False, False, False, | ||
| 33 | - True, 0), | ||
| 34 | - 'oleobj/embedded-simple-2007.xlsb': (False,), | ||
| 35 | - 'oleobj/embedded-simple-2007.docm': (False,), | ||
| 36 | - 'oleobj/embedded-simple-2007.xltx': (False,), | ||
| 37 | - 'oleobj/embedded-simple-2007.xlam': (False,), | ||
| 38 | - 'oleobj/embedded-simple-2007.dotm': (False,), | ||
| 39 | - 'oleobj/sample_with_lnk_file.ppt': (True, True, PPT, False, False, | ||
| 40 | - False, False, True, False, | ||
| 41 | - False, 0), | ||
| 42 | - 'oleobj/embedded-simple-2007.xlsx': (False,), | ||
| 43 | - 'oleobj/embedded-simple-2007.xlsm': (False,), | ||
| 44 | - 'oleobj/embedded-simple-2007.ppsx': (False,), | ||
| 45 | - 'oleobj/embedded-simple-2007.pps': (True, True, PPT, False, False, | ||
| 46 | - False, False, True, False, | ||
| 47 | - False, 0), | ||
| 48 | - 'oleobj/embedded-simple-2007.xla': (True, True, EXCEL, False, | ||
| 49 | - False, False, True, False, | ||
| 50 | - False, False, 0), | ||
| 51 | - 'oleobj/sample_with_calc_embedded.doc': (True, True, WORD, False, | ||
| 52 | - True, False, False, False, | ||
| 53 | - False, True, 0), | ||
| 54 | - 'oleobj/embedded-unicode-2007.docx': (False,), | ||
| 55 | - 'oleobj/embedded-unicode.doc': (True, True, WORD, False, True, | ||
| 56 | - False, False, False, False, True, | ||
| 57 | - 0), | ||
| 58 | - 'oleobj/embedded-simple-2007.doc': (True, True, WORD, False, True, | ||
| 59 | - False, False, False, False, | ||
| 60 | - True, 0), | ||
| 61 | - 'oleobj/embedded-simple-2007.xls': (True, True, EXCEL, False, | ||
| 62 | - False, False, True, False, | ||
| 63 | - False, False, 0), | ||
| 64 | - 'oleobj/embedded-simple-2007.dot': (True, True, WORD, False, True, | ||
| 65 | - False, False, False, False, | ||
| 66 | - True, 0), | ||
| 67 | - 'oleobj/sample_with_lnk_to_calc.doc': (True, True, WORD, False, | ||
| 68 | - True, False, False, False, | ||
| 69 | - False, True, 0), | ||
| 70 | - 'oleobj/embedded-simple-2007.ppt': (True, True, PPT, False, False, | ||
| 71 | - False, False, True, False, | ||
| 72 | - False, 0), | ||
| 73 | - 'oleobj/sample_with_lnk_file.pps': (True, True, PPT, False, False, | ||
| 74 | - False, False, True, False, | ||
| 75 | - False, 0), | ||
| 76 | - 'oleobj/embedded-simple-2007.pptx': (False,), | ||
| 77 | - 'oleobj/embedded-simple-2007.ppsm': (False,), | ||
| 78 | - 'oleobj/embedded-simple-2007.dotx': (False,), | ||
| 79 | - 'oleobj/embedded-simple-2007.pptm': (False,), | ||
| 80 | - 'oleobj/embedded-simple-2007.xlt': (True, True, EXCEL, False, | ||
| 81 | - False, False, True, False, | ||
| 82 | - False, False, 0), | ||
| 83 | - 'oleobj/embedded-simple-2007.docx': (False,), | ||
| 84 | - 'oleobj/embedded-simple-2007.potx': (False,), | ||
| 85 | - 'oleobj/embedded-simple-2007.pot': (True, True, PPT, False, False, | ||
| 86 | - False, False, True, False, | ||
| 87 | - False, 0), | ||
| 88 | - 'oleobj/embedded-simple-2007.xltm': (False,), | ||
| 89 | - 'oleobj/embedded-simple-2007.potm': (False,), | ||
| 90 | - 'encrypted/encrypted.xlsx': CRYPT, | ||
| 91 | - 'encrypted/encrypted.docm': CRYPT, | ||
| 92 | - 'encrypted/encrypted.docx': CRYPT, | ||
| 93 | - 'encrypted/encrypted.pptm': CRYPT, | ||
| 94 | - 'encrypted/encrypted.xlsb': CRYPT, | ||
| 95 | - 'encrypted/encrypted.xls': (True, True, EXCEL, True, False, False, | ||
| 96 | - True, False, False, False, 0), | ||
| 97 | - 'encrypted/encrypted.ppt': (True, False, 'unknown', True, False, | ||
| 98 | - False, False, True, False, False, 0), | ||
| 99 | - 'encrypted/encrypted.pptx': CRYPT, | ||
| 100 | - 'encrypted/encrypted.xlsm': CRYPT, | ||
| 101 | - 'encrypted/encrypted.doc': (True, True, WORD, True, True, False, | ||
| 102 | - False, False, False, False, 0), | ||
| 103 | - 'msodde/harmless-clean.docm': (False,), | ||
| 104 | - 'msodde/dde-in-csv.csv': (False,), | ||
| 105 | - 'msodde/dde-test-from-office2013-utf_16le-korean.doc': | ||
| 106 | - (True, True, WORD, False, True, False, False, False, False, | ||
| 107 | - False, 0), | ||
| 108 | - 'msodde/harmless-clean.doc': (True, True, WORD, False, True, False, | ||
| 109 | - False, False, False, False, 0), | ||
| 110 | - 'msodde/dde-test.docm': (False,), | ||
| 111 | - 'msodde/dde-test.xlsb': (False,), | ||
| 112 | - 'msodde/dde-test.xlsm': (False,), | ||
| 113 | - 'msodde/dde-test.docx': (False,), | ||
| 114 | - 'msodde/dde-test.xlsx': (False,), | ||
| 115 | - 'msodde/dde-test-from-office2003.doc': (True, True, WORD, False, | ||
| 116 | - True, False, False, False, | ||
| 117 | - False, False, 0), | ||
| 118 | - 'msodde/dde-test-from-office2016.doc': (True, True, WORD, False, | ||
| 119 | - True, False, False, False, | ||
| 120 | - False, False, 0), | ||
| 121 | - 'msodde/harmless-clean.docx': (False,), | ||
| 122 | - 'oleform/oleform-PR314.docm': (False,), | ||
| 123 | - 'basic/encrypted.docx': CRYPT, | ||
| 124 | - 'oleobj/external_link/sample_with_external_link_to_doc.docx': (False,), | ||
| 125 | - 'oleobj/external_link/sample_with_external_link_to_doc.xlsb': (False,), | ||
| 126 | - 'oleobj/external_link/sample_with_external_link_to_doc.dotm': (False,), | ||
| 127 | - 'oleobj/external_link/sample_with_external_link_to_doc.xlsm': (False,), | ||
| 128 | - 'oleobj/external_link/sample_with_external_link_to_doc.pptx': (False,), | ||
| 129 | - 'oleobj/external_link/sample_with_external_link_to_doc.dotx': (False,), | ||
| 130 | - 'oleobj/external_link/sample_with_external_link_to_doc.docm': (False,), | ||
| 131 | - 'oleobj/external_link/sample_with_external_link_to_doc.potm': (False,), | ||
| 132 | - 'oleobj/external_link/sample_with_external_link_to_doc.xlsx': (False,), | ||
| 133 | - 'oleobj/external_link/sample_with_external_link_to_doc.potx': (False,), | ||
| 134 | - 'oleobj/external_link/sample_with_external_link_to_doc.ppsm': (False,), | ||
| 135 | - 'oleobj/external_link/sample_with_external_link_to_doc.pptm': (False,), | ||
| 136 | - 'oleobj/external_link/sample_with_external_link_to_doc.ppsx': (False,), | ||
| 137 | - 'encrypted/autostart-encrypt-standardpassword.xlsm': | ||
| 138 | - (True, False, 'unknown', True, False, False, False, False, False, False, 0), | ||
| 139 | - 'encrypted/autostart-encrypt-standardpassword.xls': | ||
| 140 | - (True, True, EXCEL, True, False, True, True, False, False, False, 0), | ||
| 141 | - 'encrypted/dde-test-encrypt-standardpassword.xlsx': | ||
| 142 | - (True, False, 'unknown', True, False, False, False, False, False, False, 0), | ||
| 143 | - 'encrypted/dde-test-encrypt-standardpassword.xlsm': | ||
| 144 | - (True, False, 'unknown', True, False, False, False, False, False, False, 0), | ||
| 145 | - 'encrypted/autostart-encrypt-standardpassword.xlsb': | ||
| 146 | - (True, False, 'unknown', True, False, False, False, False, False, False, 0), | ||
| 147 | - 'encrypted/dde-test-encrypt-standardpassword.xls': | ||
| 148 | - (True, True, EXCEL, True, False, False, True, False, False, False, 0), | ||
| 149 | - 'encrypted/dde-test-encrypt-standardpassword.xlsb': | ||
| 150 | - (True, False, 'unknown', True, False, False, False, False, False, False, 0), | ||
| 151 | - } | ||
| 152 | - | ||
| 153 | - indicator_names = [] | ||
| 154 | - for base_dir, _, files in os.walk(DATA_BASE_DIR): | ||
| 155 | - for filename in files: | ||
| 156 | - full_path = join(base_dir, filename) | ||
| 157 | - name = relpath(full_path, DATA_BASE_DIR) | ||
| 158 | - values = tuple(indicator.value for indicator in | ||
| 159 | - oleid.OleID(full_path).check()) | ||
| 160 | - if len(indicator_names) < 2: # not initialized with ole yet | ||
| 161 | - indicator_names = tuple(indicator.name for indicator in | ||
| 162 | - oleid.OleID(full_path).check()) | ||
| 163 | - suffix = splitext(filename)[1] | ||
| 164 | - if suffix in NON_OLE_SUFFIXES: | ||
| 165 | - self.assertEqual(values, NON_OLE_VALUES, | ||
| 166 | - msg='For non-ole file {} expected {}, ' | ||
| 167 | - 'not {}'.format(name, NON_OLE_VALUES, | ||
| 168 | - values)) | ||
| 169 | - continue | ||
| 170 | - try: | ||
| 171 | - self.assertEqual(values, OLE_VALUES[name], | ||
| 172 | - msg='Wrong detail values for {}:\n' | ||
| 173 | - ' Names {}\n Found {}\n Expect {}' | ||
| 174 | - .format(name, indicator_names, values, | ||
| 175 | - OLE_VALUES[name])) | ||
| 176 | - except KeyError: | ||
| 177 | - print('Should add oleid output for {} to {} ({})' | ||
| 178 | - .format(name, __name__, values)) | 19 | + def setUp(self): |
| 20 | + """Called before tests; populates self.oleids""" | ||
| 21 | + self.oleids = [] | ||
| 22 | + for filename, file_contents in loop_over_files(): | ||
| 23 | + curr_id = oleid.OleID(filename=filename, data=file_contents) | ||
| 24 | + value_dict = dict((ind.id, ind.value) for ind in curr_id.check()) | ||
| 25 | + self.oleids.append((filename, value_dict)) | ||
| 26 | + | ||
| 27 | + # note: indicators "ftype" and "container" are from ftguess, | ||
| 28 | + # so tested there, already | ||
| 29 | + | ||
| 30 | + def test_properties(self): | ||
| 31 | + """Test indicators "appname", "codepage" and "author" of ole files.""" | ||
| 32 | + for filename, value_dict in self.oleids: | ||
| 33 | + # print('Debugging: testing file {0}'.format(filename)) | ||
| 34 | + if value_dict['container'] != CONTAINER.OLE: | ||
| 35 | + self.assertNotIn('appname', value_dict) | ||
| 36 | + self.assertNotIn('codepage', value_dict) | ||
| 37 | + self.assertNotIn('author', value_dict) | ||
| 38 | + continue | ||
| 39 | + | ||
| 40 | + before_dot, suffix = splitext(filename) | ||
| 41 | + if suffix == '.zip': | ||
| 42 | + suffix = splitext(before_dot)[1] | ||
| 43 | + | ||
| 44 | + if 'encrypted' in filename \ | ||
| 45 | + and suffix != '.xls' and suffix != '.doc': | ||
| 46 | + self.assertEqual(value_dict['appname'], None) | ||
| 47 | + self.assertEqual(value_dict['codepage'], None) | ||
| 48 | + self.assertEqual(value_dict['author'], None) | ||
| 49 | + continue | ||
| 50 | + | ||
| 51 | + if suffix.startswith('.d'): | ||
| 52 | + self.assertEqual(value_dict['appname'], | ||
| 53 | + b'Microsoft Office Word') | ||
| 54 | + elif suffix.startswith('.x'): | ||
| 55 | + self.assertIn(value_dict['appname'], | ||
| 56 | + (b'Microsoft Office Excel', b'Microsoft Excel')) | ||
| 57 | + # old types have no "Office" in the app name | ||
| 58 | + elif suffix.startswith('.p'): | ||
| 59 | + self.assertEqual(value_dict['appname'], | ||
| 60 | + b'Microsoft Office PowerPoint') | ||
| 61 | + else: | ||
| 62 | + self.fail('Unexpected suffix {0} from app {1}' | ||
| 63 | + .format(suffix, value_dict['appname'])) | ||
| 64 | + | ||
| 65 | + if 'utf_16le-korean' in filename: | ||
| 66 | + self.assertEqual(value_dict['codepage'], | ||
| 67 | + '949: ANSI/OEM Korean (Unified Hangul Code)') | ||
| 68 | + self.assertEqual(value_dict['author'], | ||
| 69 | + b'\xb1\xe8\xb1\xe2\xc1\xa4;kijeong') | ||
| 70 | + else: | ||
| 71 | + self.assertEqual(value_dict['codepage'], | ||
| 72 | + '1252: ANSI Latin 1; Western European (Windows)') | ||
| 73 | + self.assertIn(value_dict['author'], | ||
| 74 | + (b'user', b'schulung', | ||
| 75 | + b'xxxxxxxxxxxx', b'zzzzzzzzzzzz')) | ||
| 76 | + | ||
| 77 | + def test_encrypted(self): | ||
| 78 | + """Test indicator "encrypted".""" | ||
| 79 | + for filename, value_dict in self.oleids: | ||
| 80 | + # print('Debugging: testing file {0}'.format(filename)) | ||
| 81 | + self.assertEqual(value_dict['encrypted'], 'encrypted' in filename) | ||
| 82 | + | ||
| 83 | + def test_external_rels(self): | ||
| 84 | + """Test indicator for external relationships.""" | ||
| 85 | + for filename, value_dict in self.oleids: | ||
| 86 | + # print('Debugging: testing file {0}'.format(filename)) | ||
| 87 | + self.assertEqual(value_dict['ext_rels'], | ||
| 88 | + '/external_link/' in filename) | ||
| 89 | + | ||
| 90 | + def test_objectpool(self): | ||
| 91 | + """Test indicator for ObjectPool stream in ole files.""" | ||
| 92 | + for filename, value_dict in self.oleids: | ||
| 93 | + # print('Debugging: testing file {0}'.format(filename)) | ||
| 94 | + if (filename.startswith('oleobj/sample_with_') | ||
| 95 | + or filename.startswith('oleobj/embedded')) \ | ||
| 96 | + and (filename.endswith('.doc') | ||
| 97 | + or filename.endswith('.dot')): | ||
| 98 | + self.assertTrue(value_dict['ObjectPool']) | ||
| 99 | + else: | ||
| 100 | + self.assertFalse(value_dict['ObjectPool']) | ||
| 101 | + | ||
| 102 | + def test_macros(self): | ||
| 103 | + """Test indicator for macros.""" | ||
| 104 | + for filename, value_dict in self.oleids: | ||
| 105 | + # TODO: we need a sample file with xlm macros | ||
| 106 | + before_dot, suffix = splitext(filename) | ||
| 107 | + if suffix == '.zip': | ||
| 108 | + suffix = splitext(before_dot)[1] | ||
| 109 | + # print('Debugging: {1}, {2} for {0}' | ||
| 110 | + # .format(filename, value_dict['vba'], value_dict['xlm'])) | ||
| 111 | + | ||
| 112 | + # xlm detection does not work in-memory (yet) | ||
| 113 | + # --> xlm is "unknown" for excel files, except some encrypted files | ||
| 114 | + self.assertIn(value_dict['xlm'], ('Unknown', 'No')) | ||
| 115 | + | ||
| 116 | + # "macro detection" in text files leads to interesting results: | ||
| 117 | + if filename in ('ooxml/dde-in-excel2003.xml', # not really | ||
| 118 | + 'encrypted/autostart-encrypt-standardpassword.xls', | ||
| 119 | + 'msodde/dde-in-csv.csv', # "Windows" "calc.exe" | ||
| 120 | + 'msodde/dde-in-excel2003.xml', # same as above | ||
| 121 | + 'oleform/oleform-PR314.docm', | ||
| 122 | + 'basic/empty', # WTF? | ||
| 123 | + 'basic/text'): # no macros! | ||
| 124 | + self.assertEqual(value_dict['vba'], 'Yes') | ||
| 125 | + else: | ||
| 126 | + self.assertEqual(value_dict['vba'], 'No') | ||
| 127 | + | ||
| 128 | + def test_flash(self): | ||
| 129 | + """Test indicator for flash.""" | ||
| 130 | + # TODO: add a sample that contains flash | ||
| 131 | + for filename, value_dict in self.oleids: | ||
| 132 | + # print('Debugging: testing file {0}'.format(filename)) | ||
| 133 | + self.assertEqual(value_dict['flash'], 0) | ||
| 134 | + | ||
| 179 | 135 | ||
| 180 | 136 | ||
| 181 | # just in case somebody calls this file as a script | 137 | # just in case somebody calls this file as a script |