Commit cf1532fcc4ba929f10c06375970ef4e5e5f89d38

Authored by Christian Herdtweck
1 parent f0b17938

tests: Re-create oleid tests

With the creation of ftguess oleid has been changed a lot, the output
of its .check() method is very different. Instead of just adapting the
existing unittests, I re-created the complete test, trying to make it
easier to change/extend in the future.
Showing 1 changed file with 118 additions and 162 deletions
tests/oleid/test_basic.py
@@ -8,174 +8,130 @@ import unittest @@ -8,174 +8,130 @@ import unittest
8 import os 8 import os
9 from os.path import join, relpath, splitext 9 from os.path import join, relpath, splitext
10 from oletools import oleid 10 from oletools import oleid
  11 +from oletools.ftguess import CONTAINER
11 12
12 -# Directory with test data, independent of current working directory  
13 -from tests.test_utils import DATA_BASE_DIR 13 +from tests.test_utils.testdata_reader import loop_over_files, DATA_BASE_DIR
14 14
15 15
16 class TestOleIDBasic(unittest.TestCase): 16 class TestOleIDBasic(unittest.TestCase):
17 """Test basic functionality of OleID""" 17 """Test basic functionality of OleID"""
18 18
19 - def test_all(self):  
20 - """Run all file in test-data through oleid and compare to known ouput"""  
21 - # this relies on order of indicators being constant, could relax that  
22 - # Also requires that files have the correct suffixes (no rtf in doc)  
23 - NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '', '.odt', '.ods', '.odp')  
24 - NON_OLE_VALUES = (False, )  
25 - WORD = b'Microsoft Office Word'  
26 - PPT = b'Microsoft Office PowerPoint'  
27 - EXCEL = b'Microsoft Excel'  
28 - CRYPT = (True, False, 'unknown', True, False, False, False, False,  
29 - False, False, 0)  
30 - OLE_VALUES = {  
31 - 'oleobj/sample_with_lnk_file.doc': (True, True, WORD, False, True,  
32 - False, False, False, False,  
33 - True, 0),  
34 - 'oleobj/embedded-simple-2007.xlsb': (False,),  
35 - 'oleobj/embedded-simple-2007.docm': (False,),  
36 - 'oleobj/embedded-simple-2007.xltx': (False,),  
37 - 'oleobj/embedded-simple-2007.xlam': (False,),  
38 - 'oleobj/embedded-simple-2007.dotm': (False,),  
39 - 'oleobj/sample_with_lnk_file.ppt': (True, True, PPT, False, False,  
40 - False, False, True, False,  
41 - False, 0),  
42 - 'oleobj/embedded-simple-2007.xlsx': (False,),  
43 - 'oleobj/embedded-simple-2007.xlsm': (False,),  
44 - 'oleobj/embedded-simple-2007.ppsx': (False,),  
45 - 'oleobj/embedded-simple-2007.pps': (True, True, PPT, False, False,  
46 - False, False, True, False,  
47 - False, 0),  
48 - 'oleobj/embedded-simple-2007.xla': (True, True, EXCEL, False,  
49 - False, False, True, False,  
50 - False, False, 0),  
51 - 'oleobj/sample_with_calc_embedded.doc': (True, True, WORD, False,  
52 - True, False, False, False,  
53 - False, True, 0),  
54 - 'oleobj/embedded-unicode-2007.docx': (False,),  
55 - 'oleobj/embedded-unicode.doc': (True, True, WORD, False, True,  
56 - False, False, False, False, True,  
57 - 0),  
58 - 'oleobj/embedded-simple-2007.doc': (True, True, WORD, False, True,  
59 - False, False, False, False,  
60 - True, 0),  
61 - 'oleobj/embedded-simple-2007.xls': (True, True, EXCEL, False,  
62 - False, False, True, False,  
63 - False, False, 0),  
64 - 'oleobj/embedded-simple-2007.dot': (True, True, WORD, False, True,  
65 - False, False, False, False,  
66 - True, 0),  
67 - 'oleobj/sample_with_lnk_to_calc.doc': (True, True, WORD, False,  
68 - True, False, False, False,  
69 - False, True, 0),  
70 - 'oleobj/embedded-simple-2007.ppt': (True, True, PPT, False, False,  
71 - False, False, True, False,  
72 - False, 0),  
73 - 'oleobj/sample_with_lnk_file.pps': (True, True, PPT, False, False,  
74 - False, False, True, False,  
75 - False, 0),  
76 - 'oleobj/embedded-simple-2007.pptx': (False,),  
77 - 'oleobj/embedded-simple-2007.ppsm': (False,),  
78 - 'oleobj/embedded-simple-2007.dotx': (False,),  
79 - 'oleobj/embedded-simple-2007.pptm': (False,),  
80 - 'oleobj/embedded-simple-2007.xlt': (True, True, EXCEL, False,  
81 - False, False, True, False,  
82 - False, False, 0),  
83 - 'oleobj/embedded-simple-2007.docx': (False,),  
84 - 'oleobj/embedded-simple-2007.potx': (False,),  
85 - 'oleobj/embedded-simple-2007.pot': (True, True, PPT, False, False,  
86 - False, False, True, False,  
87 - False, 0),  
88 - 'oleobj/embedded-simple-2007.xltm': (False,),  
89 - 'oleobj/embedded-simple-2007.potm': (False,),  
90 - 'encrypted/encrypted.xlsx': CRYPT,  
91 - 'encrypted/encrypted.docm': CRYPT,  
92 - 'encrypted/encrypted.docx': CRYPT,  
93 - 'encrypted/encrypted.pptm': CRYPT,  
94 - 'encrypted/encrypted.xlsb': CRYPT,  
95 - 'encrypted/encrypted.xls': (True, True, EXCEL, True, False, False,  
96 - True, False, False, False, 0),  
97 - 'encrypted/encrypted.ppt': (True, False, 'unknown', True, False,  
98 - False, False, True, False, False, 0),  
99 - 'encrypted/encrypted.pptx': CRYPT,  
100 - 'encrypted/encrypted.xlsm': CRYPT,  
101 - 'encrypted/encrypted.doc': (True, True, WORD, True, True, False,  
102 - False, False, False, False, 0),  
103 - 'msodde/harmless-clean.docm': (False,),  
104 - 'msodde/dde-in-csv.csv': (False,),  
105 - 'msodde/dde-test-from-office2013-utf_16le-korean.doc':  
106 - (True, True, WORD, False, True, False, False, False, False,  
107 - False, 0),  
108 - 'msodde/harmless-clean.doc': (True, True, WORD, False, True, False,  
109 - False, False, False, False, 0),  
110 - 'msodde/dde-test.docm': (False,),  
111 - 'msodde/dde-test.xlsb': (False,),  
112 - 'msodde/dde-test.xlsm': (False,),  
113 - 'msodde/dde-test.docx': (False,),  
114 - 'msodde/dde-test.xlsx': (False,),  
115 - 'msodde/dde-test-from-office2003.doc': (True, True, WORD, False,  
116 - True, False, False, False,  
117 - False, False, 0),  
118 - 'msodde/dde-test-from-office2016.doc': (True, True, WORD, False,  
119 - True, False, False, False,  
120 - False, False, 0),  
121 - 'msodde/harmless-clean.docx': (False,),  
122 - 'oleform/oleform-PR314.docm': (False,),  
123 - 'basic/encrypted.docx': CRYPT,  
124 - 'oleobj/external_link/sample_with_external_link_to_doc.docx': (False,),  
125 - 'oleobj/external_link/sample_with_external_link_to_doc.xlsb': (False,),  
126 - 'oleobj/external_link/sample_with_external_link_to_doc.dotm': (False,),  
127 - 'oleobj/external_link/sample_with_external_link_to_doc.xlsm': (False,),  
128 - 'oleobj/external_link/sample_with_external_link_to_doc.pptx': (False,),  
129 - 'oleobj/external_link/sample_with_external_link_to_doc.dotx': (False,),  
130 - 'oleobj/external_link/sample_with_external_link_to_doc.docm': (False,),  
131 - 'oleobj/external_link/sample_with_external_link_to_doc.potm': (False,),  
132 - 'oleobj/external_link/sample_with_external_link_to_doc.xlsx': (False,),  
133 - 'oleobj/external_link/sample_with_external_link_to_doc.potx': (False,),  
134 - 'oleobj/external_link/sample_with_external_link_to_doc.ppsm': (False,),  
135 - 'oleobj/external_link/sample_with_external_link_to_doc.pptm': (False,),  
136 - 'oleobj/external_link/sample_with_external_link_to_doc.ppsx': (False,),  
137 - 'encrypted/autostart-encrypt-standardpassword.xlsm':  
138 - (True, False, 'unknown', True, False, False, False, False, False, False, 0),  
139 - 'encrypted/autostart-encrypt-standardpassword.xls':  
140 - (True, True, EXCEL, True, False, True, True, False, False, False, 0),  
141 - 'encrypted/dde-test-encrypt-standardpassword.xlsx':  
142 - (True, False, 'unknown', True, False, False, False, False, False, False, 0),  
143 - 'encrypted/dde-test-encrypt-standardpassword.xlsm':  
144 - (True, False, 'unknown', True, False, False, False, False, False, False, 0),  
145 - 'encrypted/autostart-encrypt-standardpassword.xlsb':  
146 - (True, False, 'unknown', True, False, False, False, False, False, False, 0),  
147 - 'encrypted/dde-test-encrypt-standardpassword.xls':  
148 - (True, True, EXCEL, True, False, False, True, False, False, False, 0),  
149 - 'encrypted/dde-test-encrypt-standardpassword.xlsb':  
150 - (True, False, 'unknown', True, False, False, False, False, False, False, 0),  
151 - }  
152 -  
153 - indicator_names = []  
154 - for base_dir, _, files in os.walk(DATA_BASE_DIR):  
155 - for filename in files:  
156 - full_path = join(base_dir, filename)  
157 - name = relpath(full_path, DATA_BASE_DIR)  
158 - values = tuple(indicator.value for indicator in  
159 - oleid.OleID(full_path).check())  
160 - if len(indicator_names) < 2: # not initialized with ole yet  
161 - indicator_names = tuple(indicator.name for indicator in  
162 - oleid.OleID(full_path).check())  
163 - suffix = splitext(filename)[1]  
164 - if suffix in NON_OLE_SUFFIXES:  
165 - self.assertEqual(values, NON_OLE_VALUES,  
166 - msg='For non-ole file {} expected {}, '  
167 - 'not {}'.format(name, NON_OLE_VALUES,  
168 - values))  
169 - continue  
170 - try:  
171 - self.assertEqual(values, OLE_VALUES[name],  
172 - msg='Wrong detail values for {}:\n'  
173 - ' Names {}\n Found {}\n Expect {}'  
174 - .format(name, indicator_names, values,  
175 - OLE_VALUES[name]))  
176 - except KeyError:  
177 - print('Should add oleid output for {} to {} ({})'  
178 - .format(name, __name__, values)) 19 + def setUp(self):
  20 + """Called before tests; populates self.oleids"""
  21 + self.oleids = []
  22 + for filename, file_contents in loop_over_files():
  23 + curr_id = oleid.OleID(filename=filename, data=file_contents)
  24 + value_dict = dict((ind.id, ind.value) for ind in curr_id.check())
  25 + self.oleids.append((filename, value_dict))
  26 +
  27 + # note: indicators "ftype" and "container" are from ftguess,
  28 + # so tested there, already
  29 +
  30 + def test_properties(self):
  31 + """Test indicators "appname", "codepage" and "author" of ole files."""
  32 + for filename, value_dict in self.oleids:
  33 + # print('Debugging: testing file {0}'.format(filename))
  34 + if value_dict['container'] != CONTAINER.OLE:
  35 + self.assertNotIn('appname', value_dict)
  36 + self.assertNotIn('codepage', value_dict)
  37 + self.assertNotIn('author', value_dict)
  38 + continue
  39 +
  40 + before_dot, suffix = splitext(filename)
  41 + if suffix == '.zip':
  42 + suffix = splitext(before_dot)[1]
  43 +
  44 + if 'encrypted' in filename \
  45 + and suffix != '.xls' and suffix != '.doc':
  46 + self.assertEqual(value_dict['appname'], None)
  47 + self.assertEqual(value_dict['codepage'], None)
  48 + self.assertEqual(value_dict['author'], None)
  49 + continue
  50 +
  51 + if suffix.startswith('.d'):
  52 + self.assertEqual(value_dict['appname'],
  53 + b'Microsoft Office Word')
  54 + elif suffix.startswith('.x'):
  55 + self.assertIn(value_dict['appname'],
  56 + (b'Microsoft Office Excel', b'Microsoft Excel'))
  57 + # old types have no "Office" in the app name
  58 + elif suffix.startswith('.p'):
  59 + self.assertEqual(value_dict['appname'],
  60 + b'Microsoft Office PowerPoint')
  61 + else:
  62 + self.fail('Unexpected suffix {0} from app {1}'
  63 + .format(suffix, value_dict['appname']))
  64 +
  65 + if 'utf_16le-korean' in filename:
  66 + self.assertEqual(value_dict['codepage'],
  67 + '949: ANSI/OEM Korean (Unified Hangul Code)')
  68 + self.assertEqual(value_dict['author'],
  69 + b'\xb1\xe8\xb1\xe2\xc1\xa4;kijeong')
  70 + else:
  71 + self.assertEqual(value_dict['codepage'],
  72 + '1252: ANSI Latin 1; Western European (Windows)')
  73 + self.assertIn(value_dict['author'],
  74 + (b'user', b'schulung',
  75 + b'xxxxxxxxxxxx', b'zzzzzzzzzzzz'))
  76 +
  77 + def test_encrypted(self):
  78 + """Test indicator "encrypted"."""
  79 + for filename, value_dict in self.oleids:
  80 + # print('Debugging: testing file {0}'.format(filename))
  81 + self.assertEqual(value_dict['encrypted'], 'encrypted' in filename)
  82 +
  83 + def test_external_rels(self):
  84 + """Test indicator for external relationships."""
  85 + for filename, value_dict in self.oleids:
  86 + # print('Debugging: testing file {0}'.format(filename))
  87 + self.assertEqual(value_dict['ext_rels'],
  88 + '/external_link/' in filename)
  89 +
  90 + def test_objectpool(self):
  91 + """Test indicator for ObjectPool stream in ole files."""
  92 + for filename, value_dict in self.oleids:
  93 + # print('Debugging: testing file {0}'.format(filename))
  94 + if (filename.startswith('oleobj/sample_with_')
  95 + or filename.startswith('oleobj/embedded')) \
  96 + and (filename.endswith('.doc')
  97 + or filename.endswith('.dot')):
  98 + self.assertTrue(value_dict['ObjectPool'])
  99 + else:
  100 + self.assertFalse(value_dict['ObjectPool'])
  101 +
  102 + def test_macros(self):
  103 + """Test indicator for macros."""
  104 + for filename, value_dict in self.oleids:
  105 + # TODO: we need a sample file with xlm macros
  106 + before_dot, suffix = splitext(filename)
  107 + if suffix == '.zip':
  108 + suffix = splitext(before_dot)[1]
  109 + # print('Debugging: {1}, {2} for {0}'
  110 + # .format(filename, value_dict['vba'], value_dict['xlm']))
  111 +
  112 + # xlm detection does not work in-memory (yet)
  113 + # --> xlm is "unknown" for excel files, except some encrypted files
  114 + self.assertIn(value_dict['xlm'], ('Unknown', 'No'))
  115 +
  116 + # "macro detection" in text files leads to interesting results:
  117 + if filename in ('ooxml/dde-in-excel2003.xml', # not really
  118 + 'encrypted/autostart-encrypt-standardpassword.xls',
  119 + 'msodde/dde-in-csv.csv', # "Windows" "calc.exe"
  120 + 'msodde/dde-in-excel2003.xml', # same as above
  121 + 'oleform/oleform-PR314.docm',
  122 + 'basic/empty', # WTF?
  123 + 'basic/text'): # no macros!
  124 + self.assertEqual(value_dict['vba'], 'Yes')
  125 + else:
  126 + self.assertEqual(value_dict['vba'], 'No')
  127 +
  128 + def test_flash(self):
  129 + """Test indicator for flash."""
  130 + # TODO: add a sample that contains flash
  131 + for filename, value_dict in self.oleids:
  132 + # print('Debugging: testing file {0}'.format(filename))
  133 + self.assertEqual(value_dict['flash'], 0)
  134 +
179 135
180 136
181 # just in case somebody calls this file as a script 137 # just in case somebody calls this file as a script