Commit cf1532fcc4ba929f10c06375970ef4e5e5f89d38
1 parent
f0b17938
tests: Re-create oleid tests
With the creation of ftguess oleid has been changed a lot, the output of its .check() method is very different. Instead of just adapting the existing unittests, I re-created the complete test, trying to make it easier to change/extend in the future.
Showing
1 changed file
with
118 additions
and
162 deletions
tests/oleid/test_basic.py
| ... | ... | @@ -8,174 +8,130 @@ import unittest |
| 8 | 8 | import os |
| 9 | 9 | from os.path import join, relpath, splitext |
| 10 | 10 | from oletools import oleid |
| 11 | +from oletools.ftguess import CONTAINER | |
| 11 | 12 | |
| 12 | -# Directory with test data, independent of current working directory | |
| 13 | -from tests.test_utils import DATA_BASE_DIR | |
| 13 | +from tests.test_utils.testdata_reader import loop_over_files, DATA_BASE_DIR | |
| 14 | 14 | |
| 15 | 15 | |
| 16 | 16 | class TestOleIDBasic(unittest.TestCase): |
| 17 | 17 | """Test basic functionality of OleID""" |
| 18 | 18 | |
| 19 | - def test_all(self): | |
| 20 | - """Run all file in test-data through oleid and compare to known ouput""" | |
| 21 | - # this relies on order of indicators being constant, could relax that | |
| 22 | - # Also requires that files have the correct suffixes (no rtf in doc) | |
| 23 | - NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '', '.odt', '.ods', '.odp') | |
| 24 | - NON_OLE_VALUES = (False, ) | |
| 25 | - WORD = b'Microsoft Office Word' | |
| 26 | - PPT = b'Microsoft Office PowerPoint' | |
| 27 | - EXCEL = b'Microsoft Excel' | |
| 28 | - CRYPT = (True, False, 'unknown', True, False, False, False, False, | |
| 29 | - False, False, 0) | |
| 30 | - OLE_VALUES = { | |
| 31 | - 'oleobj/sample_with_lnk_file.doc': (True, True, WORD, False, True, | |
| 32 | - False, False, False, False, | |
| 33 | - True, 0), | |
| 34 | - 'oleobj/embedded-simple-2007.xlsb': (False,), | |
| 35 | - 'oleobj/embedded-simple-2007.docm': (False,), | |
| 36 | - 'oleobj/embedded-simple-2007.xltx': (False,), | |
| 37 | - 'oleobj/embedded-simple-2007.xlam': (False,), | |
| 38 | - 'oleobj/embedded-simple-2007.dotm': (False,), | |
| 39 | - 'oleobj/sample_with_lnk_file.ppt': (True, True, PPT, False, False, | |
| 40 | - False, False, True, False, | |
| 41 | - False, 0), | |
| 42 | - 'oleobj/embedded-simple-2007.xlsx': (False,), | |
| 43 | - 'oleobj/embedded-simple-2007.xlsm': (False,), | |
| 44 | - 'oleobj/embedded-simple-2007.ppsx': (False,), | |
| 45 | - 'oleobj/embedded-simple-2007.pps': (True, True, PPT, False, False, | |
| 46 | - False, False, True, False, | |
| 47 | - False, 0), | |
| 48 | - 'oleobj/embedded-simple-2007.xla': (True, True, EXCEL, False, | |
| 49 | - False, False, True, False, | |
| 50 | - False, False, 0), | |
| 51 | - 'oleobj/sample_with_calc_embedded.doc': (True, True, WORD, False, | |
| 52 | - True, False, False, False, | |
| 53 | - False, True, 0), | |
| 54 | - 'oleobj/embedded-unicode-2007.docx': (False,), | |
| 55 | - 'oleobj/embedded-unicode.doc': (True, True, WORD, False, True, | |
| 56 | - False, False, False, False, True, | |
| 57 | - 0), | |
| 58 | - 'oleobj/embedded-simple-2007.doc': (True, True, WORD, False, True, | |
| 59 | - False, False, False, False, | |
| 60 | - True, 0), | |
| 61 | - 'oleobj/embedded-simple-2007.xls': (True, True, EXCEL, False, | |
| 62 | - False, False, True, False, | |
| 63 | - False, False, 0), | |
| 64 | - 'oleobj/embedded-simple-2007.dot': (True, True, WORD, False, True, | |
| 65 | - False, False, False, False, | |
| 66 | - True, 0), | |
| 67 | - 'oleobj/sample_with_lnk_to_calc.doc': (True, True, WORD, False, | |
| 68 | - True, False, False, False, | |
| 69 | - False, True, 0), | |
| 70 | - 'oleobj/embedded-simple-2007.ppt': (True, True, PPT, False, False, | |
| 71 | - False, False, True, False, | |
| 72 | - False, 0), | |
| 73 | - 'oleobj/sample_with_lnk_file.pps': (True, True, PPT, False, False, | |
| 74 | - False, False, True, False, | |
| 75 | - False, 0), | |
| 76 | - 'oleobj/embedded-simple-2007.pptx': (False,), | |
| 77 | - 'oleobj/embedded-simple-2007.ppsm': (False,), | |
| 78 | - 'oleobj/embedded-simple-2007.dotx': (False,), | |
| 79 | - 'oleobj/embedded-simple-2007.pptm': (False,), | |
| 80 | - 'oleobj/embedded-simple-2007.xlt': (True, True, EXCEL, False, | |
| 81 | - False, False, True, False, | |
| 82 | - False, False, 0), | |
| 83 | - 'oleobj/embedded-simple-2007.docx': (False,), | |
| 84 | - 'oleobj/embedded-simple-2007.potx': (False,), | |
| 85 | - 'oleobj/embedded-simple-2007.pot': (True, True, PPT, False, False, | |
| 86 | - False, False, True, False, | |
| 87 | - False, 0), | |
| 88 | - 'oleobj/embedded-simple-2007.xltm': (False,), | |
| 89 | - 'oleobj/embedded-simple-2007.potm': (False,), | |
| 90 | - 'encrypted/encrypted.xlsx': CRYPT, | |
| 91 | - 'encrypted/encrypted.docm': CRYPT, | |
| 92 | - 'encrypted/encrypted.docx': CRYPT, | |
| 93 | - 'encrypted/encrypted.pptm': CRYPT, | |
| 94 | - 'encrypted/encrypted.xlsb': CRYPT, | |
| 95 | - 'encrypted/encrypted.xls': (True, True, EXCEL, True, False, False, | |
| 96 | - True, False, False, False, 0), | |
| 97 | - 'encrypted/encrypted.ppt': (True, False, 'unknown', True, False, | |
| 98 | - False, False, True, False, False, 0), | |
| 99 | - 'encrypted/encrypted.pptx': CRYPT, | |
| 100 | - 'encrypted/encrypted.xlsm': CRYPT, | |
| 101 | - 'encrypted/encrypted.doc': (True, True, WORD, True, True, False, | |
| 102 | - False, False, False, False, 0), | |
| 103 | - 'msodde/harmless-clean.docm': (False,), | |
| 104 | - 'msodde/dde-in-csv.csv': (False,), | |
| 105 | - 'msodde/dde-test-from-office2013-utf_16le-korean.doc': | |
| 106 | - (True, True, WORD, False, True, False, False, False, False, | |
| 107 | - False, 0), | |
| 108 | - 'msodde/harmless-clean.doc': (True, True, WORD, False, True, False, | |
| 109 | - False, False, False, False, 0), | |
| 110 | - 'msodde/dde-test.docm': (False,), | |
| 111 | - 'msodde/dde-test.xlsb': (False,), | |
| 112 | - 'msodde/dde-test.xlsm': (False,), | |
| 113 | - 'msodde/dde-test.docx': (False,), | |
| 114 | - 'msodde/dde-test.xlsx': (False,), | |
| 115 | - 'msodde/dde-test-from-office2003.doc': (True, True, WORD, False, | |
| 116 | - True, False, False, False, | |
| 117 | - False, False, 0), | |
| 118 | - 'msodde/dde-test-from-office2016.doc': (True, True, WORD, False, | |
| 119 | - True, False, False, False, | |
| 120 | - False, False, 0), | |
| 121 | - 'msodde/harmless-clean.docx': (False,), | |
| 122 | - 'oleform/oleform-PR314.docm': (False,), | |
| 123 | - 'basic/encrypted.docx': CRYPT, | |
| 124 | - 'oleobj/external_link/sample_with_external_link_to_doc.docx': (False,), | |
| 125 | - 'oleobj/external_link/sample_with_external_link_to_doc.xlsb': (False,), | |
| 126 | - 'oleobj/external_link/sample_with_external_link_to_doc.dotm': (False,), | |
| 127 | - 'oleobj/external_link/sample_with_external_link_to_doc.xlsm': (False,), | |
| 128 | - 'oleobj/external_link/sample_with_external_link_to_doc.pptx': (False,), | |
| 129 | - 'oleobj/external_link/sample_with_external_link_to_doc.dotx': (False,), | |
| 130 | - 'oleobj/external_link/sample_with_external_link_to_doc.docm': (False,), | |
| 131 | - 'oleobj/external_link/sample_with_external_link_to_doc.potm': (False,), | |
| 132 | - 'oleobj/external_link/sample_with_external_link_to_doc.xlsx': (False,), | |
| 133 | - 'oleobj/external_link/sample_with_external_link_to_doc.potx': (False,), | |
| 134 | - 'oleobj/external_link/sample_with_external_link_to_doc.ppsm': (False,), | |
| 135 | - 'oleobj/external_link/sample_with_external_link_to_doc.pptm': (False,), | |
| 136 | - 'oleobj/external_link/sample_with_external_link_to_doc.ppsx': (False,), | |
| 137 | - 'encrypted/autostart-encrypt-standardpassword.xlsm': | |
| 138 | - (True, False, 'unknown', True, False, False, False, False, False, False, 0), | |
| 139 | - 'encrypted/autostart-encrypt-standardpassword.xls': | |
| 140 | - (True, True, EXCEL, True, False, True, True, False, False, False, 0), | |
| 141 | - 'encrypted/dde-test-encrypt-standardpassword.xlsx': | |
| 142 | - (True, False, 'unknown', True, False, False, False, False, False, False, 0), | |
| 143 | - 'encrypted/dde-test-encrypt-standardpassword.xlsm': | |
| 144 | - (True, False, 'unknown', True, False, False, False, False, False, False, 0), | |
| 145 | - 'encrypted/autostart-encrypt-standardpassword.xlsb': | |
| 146 | - (True, False, 'unknown', True, False, False, False, False, False, False, 0), | |
| 147 | - 'encrypted/dde-test-encrypt-standardpassword.xls': | |
| 148 | - (True, True, EXCEL, True, False, False, True, False, False, False, 0), | |
| 149 | - 'encrypted/dde-test-encrypt-standardpassword.xlsb': | |
| 150 | - (True, False, 'unknown', True, False, False, False, False, False, False, 0), | |
| 151 | - } | |
| 152 | - | |
| 153 | - indicator_names = [] | |
| 154 | - for base_dir, _, files in os.walk(DATA_BASE_DIR): | |
| 155 | - for filename in files: | |
| 156 | - full_path = join(base_dir, filename) | |
| 157 | - name = relpath(full_path, DATA_BASE_DIR) | |
| 158 | - values = tuple(indicator.value for indicator in | |
| 159 | - oleid.OleID(full_path).check()) | |
| 160 | - if len(indicator_names) < 2: # not initialized with ole yet | |
| 161 | - indicator_names = tuple(indicator.name for indicator in | |
| 162 | - oleid.OleID(full_path).check()) | |
| 163 | - suffix = splitext(filename)[1] | |
| 164 | - if suffix in NON_OLE_SUFFIXES: | |
| 165 | - self.assertEqual(values, NON_OLE_VALUES, | |
| 166 | - msg='For non-ole file {} expected {}, ' | |
| 167 | - 'not {}'.format(name, NON_OLE_VALUES, | |
| 168 | - values)) | |
| 169 | - continue | |
| 170 | - try: | |
| 171 | - self.assertEqual(values, OLE_VALUES[name], | |
| 172 | - msg='Wrong detail values for {}:\n' | |
| 173 | - ' Names {}\n Found {}\n Expect {}' | |
| 174 | - .format(name, indicator_names, values, | |
| 175 | - OLE_VALUES[name])) | |
| 176 | - except KeyError: | |
| 177 | - print('Should add oleid output for {} to {} ({})' | |
| 178 | - .format(name, __name__, values)) | |
| 19 | + def setUp(self): | |
| 20 | + """Called before tests; populates self.oleids""" | |
| 21 | + self.oleids = [] | |
| 22 | + for filename, file_contents in loop_over_files(): | |
| 23 | + curr_id = oleid.OleID(filename=filename, data=file_contents) | |
| 24 | + value_dict = dict((ind.id, ind.value) for ind in curr_id.check()) | |
| 25 | + self.oleids.append((filename, value_dict)) | |
| 26 | + | |
| 27 | + # note: indicators "ftype" and "container" are from ftguess, | |
| 28 | + # so tested there, already | |
| 29 | + | |
| 30 | + def test_properties(self): | |
| 31 | + """Test indicators "appname", "codepage" and "author" of ole files.""" | |
| 32 | + for filename, value_dict in self.oleids: | |
| 33 | + # print('Debugging: testing file {0}'.format(filename)) | |
| 34 | + if value_dict['container'] != CONTAINER.OLE: | |
| 35 | + self.assertNotIn('appname', value_dict) | |
| 36 | + self.assertNotIn('codepage', value_dict) | |
| 37 | + self.assertNotIn('author', value_dict) | |
| 38 | + continue | |
| 39 | + | |
| 40 | + before_dot, suffix = splitext(filename) | |
| 41 | + if suffix == '.zip': | |
| 42 | + suffix = splitext(before_dot)[1] | |
| 43 | + | |
| 44 | + if 'encrypted' in filename \ | |
| 45 | + and suffix != '.xls' and suffix != '.doc': | |
| 46 | + self.assertEqual(value_dict['appname'], None) | |
| 47 | + self.assertEqual(value_dict['codepage'], None) | |
| 48 | + self.assertEqual(value_dict['author'], None) | |
| 49 | + continue | |
| 50 | + | |
| 51 | + if suffix.startswith('.d'): | |
| 52 | + self.assertEqual(value_dict['appname'], | |
| 53 | + b'Microsoft Office Word') | |
| 54 | + elif suffix.startswith('.x'): | |
| 55 | + self.assertIn(value_dict['appname'], | |
| 56 | + (b'Microsoft Office Excel', b'Microsoft Excel')) | |
| 57 | + # old types have no "Office" in the app name | |
| 58 | + elif suffix.startswith('.p'): | |
| 59 | + self.assertEqual(value_dict['appname'], | |
| 60 | + b'Microsoft Office PowerPoint') | |
| 61 | + else: | |
| 62 | + self.fail('Unexpected suffix {0} from app {1}' | |
| 63 | + .format(suffix, value_dict['appname'])) | |
| 64 | + | |
| 65 | + if 'utf_16le-korean' in filename: | |
| 66 | + self.assertEqual(value_dict['codepage'], | |
| 67 | + '949: ANSI/OEM Korean (Unified Hangul Code)') | |
| 68 | + self.assertEqual(value_dict['author'], | |
| 69 | + b'\xb1\xe8\xb1\xe2\xc1\xa4;kijeong') | |
| 70 | + else: | |
| 71 | + self.assertEqual(value_dict['codepage'], | |
| 72 | + '1252: ANSI Latin 1; Western European (Windows)') | |
| 73 | + self.assertIn(value_dict['author'], | |
| 74 | + (b'user', b'schulung', | |
| 75 | + b'xxxxxxxxxxxx', b'zzzzzzzzzzzz')) | |
| 76 | + | |
| 77 | + def test_encrypted(self): | |
| 78 | + """Test indicator "encrypted".""" | |
| 79 | + for filename, value_dict in self.oleids: | |
| 80 | + # print('Debugging: testing file {0}'.format(filename)) | |
| 81 | + self.assertEqual(value_dict['encrypted'], 'encrypted' in filename) | |
| 82 | + | |
| 83 | + def test_external_rels(self): | |
| 84 | + """Test indicator for external relationships.""" | |
| 85 | + for filename, value_dict in self.oleids: | |
| 86 | + # print('Debugging: testing file {0}'.format(filename)) | |
| 87 | + self.assertEqual(value_dict['ext_rels'], | |
| 88 | + '/external_link/' in filename) | |
| 89 | + | |
| 90 | + def test_objectpool(self): | |
| 91 | + """Test indicator for ObjectPool stream in ole files.""" | |
| 92 | + for filename, value_dict in self.oleids: | |
| 93 | + # print('Debugging: testing file {0}'.format(filename)) | |
| 94 | + if (filename.startswith('oleobj/sample_with_') | |
| 95 | + or filename.startswith('oleobj/embedded')) \ | |
| 96 | + and (filename.endswith('.doc') | |
| 97 | + or filename.endswith('.dot')): | |
| 98 | + self.assertTrue(value_dict['ObjectPool']) | |
| 99 | + else: | |
| 100 | + self.assertFalse(value_dict['ObjectPool']) | |
| 101 | + | |
| 102 | + def test_macros(self): | |
| 103 | + """Test indicator for macros.""" | |
| 104 | + for filename, value_dict in self.oleids: | |
| 105 | + # TODO: we need a sample file with xlm macros | |
| 106 | + before_dot, suffix = splitext(filename) | |
| 107 | + if suffix == '.zip': | |
| 108 | + suffix = splitext(before_dot)[1] | |
| 109 | + # print('Debugging: {1}, {2} for {0}' | |
| 110 | + # .format(filename, value_dict['vba'], value_dict['xlm'])) | |
| 111 | + | |
| 112 | + # xlm detection does not work in-memory (yet) | |
| 113 | + # --> xlm is "unknown" for excel files, except some encrypted files | |
| 114 | + self.assertIn(value_dict['xlm'], ('Unknown', 'No')) | |
| 115 | + | |
| 116 | + # "macro detection" in text files leads to interesting results: | |
| 117 | + if filename in ('ooxml/dde-in-excel2003.xml', # not really | |
| 118 | + 'encrypted/autostart-encrypt-standardpassword.xls', | |
| 119 | + 'msodde/dde-in-csv.csv', # "Windows" "calc.exe" | |
| 120 | + 'msodde/dde-in-excel2003.xml', # same as above | |
| 121 | + 'oleform/oleform-PR314.docm', | |
| 122 | + 'basic/empty', # WTF? | |
| 123 | + 'basic/text'): # no macros! | |
| 124 | + self.assertEqual(value_dict['vba'], 'Yes') | |
| 125 | + else: | |
| 126 | + self.assertEqual(value_dict['vba'], 'No') | |
| 127 | + | |
| 128 | + def test_flash(self): | |
| 129 | + """Test indicator for flash.""" | |
| 130 | + # TODO: add a sample that contains flash | |
| 131 | + for filename, value_dict in self.oleids: | |
| 132 | + # print('Debugging: testing file {0}'.format(filename)) | |
| 133 | + self.assertEqual(value_dict['flash'], 0) | |
| 134 | + | |
| 179 | 135 | |
| 180 | 136 | |
| 181 | 137 | # just in case somebody calls this file as a script | ... | ... |