diff --git a/tests/oleid/test_basic.py b/tests/oleid/test_basic.py index ce4187a..e37ad61 100644 --- a/tests/oleid/test_basic.py +++ b/tests/oleid/test_basic.py @@ -8,174 +8,130 @@ import unittest import os from os.path import join, relpath, splitext from oletools import oleid +from oletools.ftguess import CONTAINER -# Directory with test data, independent of current working directory -from tests.test_utils import DATA_BASE_DIR +from tests.test_utils.testdata_reader import loop_over_files, DATA_BASE_DIR class TestOleIDBasic(unittest.TestCase): """Test basic functionality of OleID""" - def test_all(self): - """Run all file in test-data through oleid and compare to known ouput""" - # this relies on order of indicators being constant, could relax that - # Also requires that files have the correct suffixes (no rtf in doc) - NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '', '.odt', '.ods', '.odp') - NON_OLE_VALUES = (False, ) - WORD = b'Microsoft Office Word' - PPT = b'Microsoft Office PowerPoint' - EXCEL = b'Microsoft Excel' - CRYPT = (True, False, 'unknown', True, False, False, False, False, - False, False, 0) - OLE_VALUES = { - 'oleobj/sample_with_lnk_file.doc': (True, True, WORD, False, True, - False, False, False, False, - True, 0), - 'oleobj/embedded-simple-2007.xlsb': (False,), - 'oleobj/embedded-simple-2007.docm': (False,), - 'oleobj/embedded-simple-2007.xltx': (False,), - 'oleobj/embedded-simple-2007.xlam': (False,), - 'oleobj/embedded-simple-2007.dotm': (False,), - 'oleobj/sample_with_lnk_file.ppt': (True, True, PPT, False, False, - False, False, True, False, - False, 0), - 'oleobj/embedded-simple-2007.xlsx': (False,), - 'oleobj/embedded-simple-2007.xlsm': (False,), - 'oleobj/embedded-simple-2007.ppsx': (False,), - 'oleobj/embedded-simple-2007.pps': (True, True, PPT, False, False, - False, False, True, False, - False, 0), - 'oleobj/embedded-simple-2007.xla': (True, True, EXCEL, False, - False, False, True, False, - False, False, 0), - 'oleobj/sample_with_calc_embedded.doc': (True, True, WORD, False, - True, False, False, False, - False, True, 0), - 'oleobj/embedded-unicode-2007.docx': (False,), - 'oleobj/embedded-unicode.doc': (True, True, WORD, False, True, - False, False, False, False, True, - 0), - 'oleobj/embedded-simple-2007.doc': (True, True, WORD, False, True, - False, False, False, False, - True, 0), - 'oleobj/embedded-simple-2007.xls': (True, True, EXCEL, False, - False, False, True, False, - False, False, 0), - 'oleobj/embedded-simple-2007.dot': (True, True, WORD, False, True, - False, False, False, False, - True, 0), - 'oleobj/sample_with_lnk_to_calc.doc': (True, True, WORD, False, - True, False, False, False, - False, True, 0), - 'oleobj/embedded-simple-2007.ppt': (True, True, PPT, False, False, - False, False, True, False, - False, 0), - 'oleobj/sample_with_lnk_file.pps': (True, True, PPT, False, False, - False, False, True, False, - False, 0), - 'oleobj/embedded-simple-2007.pptx': (False,), - 'oleobj/embedded-simple-2007.ppsm': (False,), - 'oleobj/embedded-simple-2007.dotx': (False,), - 'oleobj/embedded-simple-2007.pptm': (False,), - 'oleobj/embedded-simple-2007.xlt': (True, True, EXCEL, False, - False, False, True, False, - False, False, 0), - 'oleobj/embedded-simple-2007.docx': (False,), - 'oleobj/embedded-simple-2007.potx': (False,), - 'oleobj/embedded-simple-2007.pot': (True, True, PPT, False, False, - False, False, True, False, - False, 0), - 'oleobj/embedded-simple-2007.xltm': (False,), - 'oleobj/embedded-simple-2007.potm': (False,), - 'encrypted/encrypted.xlsx': CRYPT, - 'encrypted/encrypted.docm': CRYPT, - 'encrypted/encrypted.docx': CRYPT, - 'encrypted/encrypted.pptm': CRYPT, - 'encrypted/encrypted.xlsb': CRYPT, - 'encrypted/encrypted.xls': (True, True, EXCEL, True, False, False, - True, False, False, False, 0), - 'encrypted/encrypted.ppt': (True, False, 'unknown', True, False, - False, False, True, False, False, 0), - 'encrypted/encrypted.pptx': CRYPT, - 'encrypted/encrypted.xlsm': CRYPT, - 'encrypted/encrypted.doc': (True, True, WORD, True, True, False, - False, False, False, False, 0), - 'msodde/harmless-clean.docm': (False,), - 'msodde/dde-in-csv.csv': (False,), - 'msodde/dde-test-from-office2013-utf_16le-korean.doc': - (True, True, WORD, False, True, False, False, False, False, - False, 0), - 'msodde/harmless-clean.doc': (True, True, WORD, False, True, False, - False, False, False, False, 0), - 'msodde/dde-test.docm': (False,), - 'msodde/dde-test.xlsb': (False,), - 'msodde/dde-test.xlsm': (False,), - 'msodde/dde-test.docx': (False,), - 'msodde/dde-test.xlsx': (False,), - 'msodde/dde-test-from-office2003.doc': (True, True, WORD, False, - True, False, False, False, - False, False, 0), - 'msodde/dde-test-from-office2016.doc': (True, True, WORD, False, - True, False, False, False, - False, False, 0), - 'msodde/harmless-clean.docx': (False,), - 'oleform/oleform-PR314.docm': (False,), - 'basic/encrypted.docx': CRYPT, - 'oleobj/external_link/sample_with_external_link_to_doc.docx': (False,), - 'oleobj/external_link/sample_with_external_link_to_doc.xlsb': (False,), - 'oleobj/external_link/sample_with_external_link_to_doc.dotm': (False,), - 'oleobj/external_link/sample_with_external_link_to_doc.xlsm': (False,), - 'oleobj/external_link/sample_with_external_link_to_doc.pptx': (False,), - 'oleobj/external_link/sample_with_external_link_to_doc.dotx': (False,), - 'oleobj/external_link/sample_with_external_link_to_doc.docm': (False,), - 'oleobj/external_link/sample_with_external_link_to_doc.potm': (False,), - 'oleobj/external_link/sample_with_external_link_to_doc.xlsx': (False,), - 'oleobj/external_link/sample_with_external_link_to_doc.potx': (False,), - 'oleobj/external_link/sample_with_external_link_to_doc.ppsm': (False,), - 'oleobj/external_link/sample_with_external_link_to_doc.pptm': (False,), - 'oleobj/external_link/sample_with_external_link_to_doc.ppsx': (False,), - 'encrypted/autostart-encrypt-standardpassword.xlsm': - (True, False, 'unknown', True, False, False, False, False, False, False, 0), - 'encrypted/autostart-encrypt-standardpassword.xls': - (True, True, EXCEL, True, False, True, True, False, False, False, 0), - 'encrypted/dde-test-encrypt-standardpassword.xlsx': - (True, False, 'unknown', True, False, False, False, False, False, False, 0), - 'encrypted/dde-test-encrypt-standardpassword.xlsm': - (True, False, 'unknown', True, False, False, False, False, False, False, 0), - 'encrypted/autostart-encrypt-standardpassword.xlsb': - (True, False, 'unknown', True, False, False, False, False, False, False, 0), - 'encrypted/dde-test-encrypt-standardpassword.xls': - (True, True, EXCEL, True, False, False, True, False, False, False, 0), - 'encrypted/dde-test-encrypt-standardpassword.xlsb': - (True, False, 'unknown', True, False, False, False, False, False, False, 0), - } - - indicator_names = [] - for base_dir, _, files in os.walk(DATA_BASE_DIR): - for filename in files: - full_path = join(base_dir, filename) - name = relpath(full_path, DATA_BASE_DIR) - values = tuple(indicator.value for indicator in - oleid.OleID(full_path).check()) - if len(indicator_names) < 2: # not initialized with ole yet - indicator_names = tuple(indicator.name for indicator in - oleid.OleID(full_path).check()) - suffix = splitext(filename)[1] - if suffix in NON_OLE_SUFFIXES: - self.assertEqual(values, NON_OLE_VALUES, - msg='For non-ole file {} expected {}, ' - 'not {}'.format(name, NON_OLE_VALUES, - values)) - continue - try: - self.assertEqual(values, OLE_VALUES[name], - msg='Wrong detail values for {}:\n' - ' Names {}\n Found {}\n Expect {}' - .format(name, indicator_names, values, - OLE_VALUES[name])) - except KeyError: - print('Should add oleid output for {} to {} ({})' - .format(name, __name__, values)) + def setUp(self): + """Called before tests; populates self.oleids""" + self.oleids = [] + for filename, file_contents in loop_over_files(): + curr_id = oleid.OleID(filename=filename, data=file_contents) + value_dict = dict((ind.id, ind.value) for ind in curr_id.check()) + self.oleids.append((filename, value_dict)) + + # note: indicators "ftype" and "container" are from ftguess, + # so tested there, already + + def test_properties(self): + """Test indicators "appname", "codepage" and "author" of ole files.""" + for filename, value_dict in self.oleids: + # print('Debugging: testing file {0}'.format(filename)) + if value_dict['container'] != CONTAINER.OLE: + self.assertNotIn('appname', value_dict) + self.assertNotIn('codepage', value_dict) + self.assertNotIn('author', value_dict) + continue + + before_dot, suffix = splitext(filename) + if suffix == '.zip': + suffix = splitext(before_dot)[1] + + if 'encrypted' in filename \ + and suffix != '.xls' and suffix != '.doc': + self.assertEqual(value_dict['appname'], None) + self.assertEqual(value_dict['codepage'], None) + self.assertEqual(value_dict['author'], None) + continue + + if suffix.startswith('.d'): + self.assertEqual(value_dict['appname'], + b'Microsoft Office Word') + elif suffix.startswith('.x'): + self.assertIn(value_dict['appname'], + (b'Microsoft Office Excel', b'Microsoft Excel')) + # old types have no "Office" in the app name + elif suffix.startswith('.p'): + self.assertEqual(value_dict['appname'], + b'Microsoft Office PowerPoint') + else: + self.fail('Unexpected suffix {0} from app {1}' + .format(suffix, value_dict['appname'])) + + if 'utf_16le-korean' in filename: + self.assertEqual(value_dict['codepage'], + '949: ANSI/OEM Korean (Unified Hangul Code)') + self.assertEqual(value_dict['author'], + b'\xb1\xe8\xb1\xe2\xc1\xa4;kijeong') + else: + self.assertEqual(value_dict['codepage'], + '1252: ANSI Latin 1; Western European (Windows)') + self.assertIn(value_dict['author'], + (b'user', b'schulung', + b'xxxxxxxxxxxx', b'zzzzzzzzzzzz')) + + def test_encrypted(self): + """Test indicator "encrypted".""" + for filename, value_dict in self.oleids: + # print('Debugging: testing file {0}'.format(filename)) + self.assertEqual(value_dict['encrypted'], 'encrypted' in filename) + + def test_external_rels(self): + """Test indicator for external relationships.""" + for filename, value_dict in self.oleids: + # print('Debugging: testing file {0}'.format(filename)) + self.assertEqual(value_dict['ext_rels'], + '/external_link/' in filename) + + def test_objectpool(self): + """Test indicator for ObjectPool stream in ole files.""" + for filename, value_dict in self.oleids: + # print('Debugging: testing file {0}'.format(filename)) + if (filename.startswith('oleobj/sample_with_') + or filename.startswith('oleobj/embedded')) \ + and (filename.endswith('.doc') + or filename.endswith('.dot')): + self.assertTrue(value_dict['ObjectPool']) + else: + self.assertFalse(value_dict['ObjectPool']) + + def test_macros(self): + """Test indicator for macros.""" + for filename, value_dict in self.oleids: + # TODO: we need a sample file with xlm macros + before_dot, suffix = splitext(filename) + if suffix == '.zip': + suffix = splitext(before_dot)[1] + # print('Debugging: {1}, {2} for {0}' + # .format(filename, value_dict['vba'], value_dict['xlm'])) + + # xlm detection does not work in-memory (yet) + # --> xlm is "unknown" for excel files, except some encrypted files + self.assertIn(value_dict['xlm'], ('Unknown', 'No')) + + # "macro detection" in text files leads to interesting results: + if filename in ('ooxml/dde-in-excel2003.xml', # not really + 'encrypted/autostart-encrypt-standardpassword.xls', + 'msodde/dde-in-csv.csv', # "Windows" "calc.exe" + 'msodde/dde-in-excel2003.xml', # same as above + 'oleform/oleform-PR314.docm', + 'basic/empty', # WTF? + 'basic/text'): # no macros! + self.assertEqual(value_dict['vba'], 'Yes') + else: + self.assertEqual(value_dict['vba'], 'No') + + def test_flash(self): + """Test indicator for flash.""" + # TODO: add a sample that contains flash + for filename, value_dict in self.oleids: + # print('Debugging: testing file {0}'.format(filename)) + self.assertEqual(value_dict['flash'], 0) + # just in case somebody calls this file as a script