Commit cf1532fcc4ba929f10c06375970ef4e5e5f89d38

Authored by Christian Herdtweck
1 parent f0b17938

tests: Re-create oleid tests

With the creation of ftguess oleid has been changed a lot, the output
of its .check() method is very different. Instead of just adapting the
existing unittests, I re-created the complete test, trying to make it
easier to change/extend in the future.
Showing 1 changed file with 118 additions and 162 deletions
tests/oleid/test_basic.py
... ... @@ -8,174 +8,130 @@ import unittest
8 8 import os
9 9 from os.path import join, relpath, splitext
10 10 from oletools import oleid
  11 +from oletools.ftguess import CONTAINER
11 12  
12   -# Directory with test data, independent of current working directory
13   -from tests.test_utils import DATA_BASE_DIR
  13 +from tests.test_utils.testdata_reader import loop_over_files, DATA_BASE_DIR
14 14  
15 15  
16 16 class TestOleIDBasic(unittest.TestCase):
17 17 """Test basic functionality of OleID"""
18 18  
19   - def test_all(self):
20   - """Run all file in test-data through oleid and compare to known ouput"""
21   - # this relies on order of indicators being constant, could relax that
22   - # Also requires that files have the correct suffixes (no rtf in doc)
23   - NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '', '.odt', '.ods', '.odp')
24   - NON_OLE_VALUES = (False, )
25   - WORD = b'Microsoft Office Word'
26   - PPT = b'Microsoft Office PowerPoint'
27   - EXCEL = b'Microsoft Excel'
28   - CRYPT = (True, False, 'unknown', True, False, False, False, False,
29   - False, False, 0)
30   - OLE_VALUES = {
31   - 'oleobj/sample_with_lnk_file.doc': (True, True, WORD, False, True,
32   - False, False, False, False,
33   - True, 0),
34   - 'oleobj/embedded-simple-2007.xlsb': (False,),
35   - 'oleobj/embedded-simple-2007.docm': (False,),
36   - 'oleobj/embedded-simple-2007.xltx': (False,),
37   - 'oleobj/embedded-simple-2007.xlam': (False,),
38   - 'oleobj/embedded-simple-2007.dotm': (False,),
39   - 'oleobj/sample_with_lnk_file.ppt': (True, True, PPT, False, False,
40   - False, False, True, False,
41   - False, 0),
42   - 'oleobj/embedded-simple-2007.xlsx': (False,),
43   - 'oleobj/embedded-simple-2007.xlsm': (False,),
44   - 'oleobj/embedded-simple-2007.ppsx': (False,),
45   - 'oleobj/embedded-simple-2007.pps': (True, True, PPT, False, False,
46   - False, False, True, False,
47   - False, 0),
48   - 'oleobj/embedded-simple-2007.xla': (True, True, EXCEL, False,
49   - False, False, True, False,
50   - False, False, 0),
51   - 'oleobj/sample_with_calc_embedded.doc': (True, True, WORD, False,
52   - True, False, False, False,
53   - False, True, 0),
54   - 'oleobj/embedded-unicode-2007.docx': (False,),
55   - 'oleobj/embedded-unicode.doc': (True, True, WORD, False, True,
56   - False, False, False, False, True,
57   - 0),
58   - 'oleobj/embedded-simple-2007.doc': (True, True, WORD, False, True,
59   - False, False, False, False,
60   - True, 0),
61   - 'oleobj/embedded-simple-2007.xls': (True, True, EXCEL, False,
62   - False, False, True, False,
63   - False, False, 0),
64   - 'oleobj/embedded-simple-2007.dot': (True, True, WORD, False, True,
65   - False, False, False, False,
66   - True, 0),
67   - 'oleobj/sample_with_lnk_to_calc.doc': (True, True, WORD, False,
68   - True, False, False, False,
69   - False, True, 0),
70   - 'oleobj/embedded-simple-2007.ppt': (True, True, PPT, False, False,
71   - False, False, True, False,
72   - False, 0),
73   - 'oleobj/sample_with_lnk_file.pps': (True, True, PPT, False, False,
74   - False, False, True, False,
75   - False, 0),
76   - 'oleobj/embedded-simple-2007.pptx': (False,),
77   - 'oleobj/embedded-simple-2007.ppsm': (False,),
78   - 'oleobj/embedded-simple-2007.dotx': (False,),
79   - 'oleobj/embedded-simple-2007.pptm': (False,),
80   - 'oleobj/embedded-simple-2007.xlt': (True, True, EXCEL, False,
81   - False, False, True, False,
82   - False, False, 0),
83   - 'oleobj/embedded-simple-2007.docx': (False,),
84   - 'oleobj/embedded-simple-2007.potx': (False,),
85   - 'oleobj/embedded-simple-2007.pot': (True, True, PPT, False, False,
86   - False, False, True, False,
87   - False, 0),
88   - 'oleobj/embedded-simple-2007.xltm': (False,),
89   - 'oleobj/embedded-simple-2007.potm': (False,),
90   - 'encrypted/encrypted.xlsx': CRYPT,
91   - 'encrypted/encrypted.docm': CRYPT,
92   - 'encrypted/encrypted.docx': CRYPT,
93   - 'encrypted/encrypted.pptm': CRYPT,
94   - 'encrypted/encrypted.xlsb': CRYPT,
95   - 'encrypted/encrypted.xls': (True, True, EXCEL, True, False, False,
96   - True, False, False, False, 0),
97   - 'encrypted/encrypted.ppt': (True, False, 'unknown', True, False,
98   - False, False, True, False, False, 0),
99   - 'encrypted/encrypted.pptx': CRYPT,
100   - 'encrypted/encrypted.xlsm': CRYPT,
101   - 'encrypted/encrypted.doc': (True, True, WORD, True, True, False,
102   - False, False, False, False, 0),
103   - 'msodde/harmless-clean.docm': (False,),
104   - 'msodde/dde-in-csv.csv': (False,),
105   - 'msodde/dde-test-from-office2013-utf_16le-korean.doc':
106   - (True, True, WORD, False, True, False, False, False, False,
107   - False, 0),
108   - 'msodde/harmless-clean.doc': (True, True, WORD, False, True, False,
109   - False, False, False, False, 0),
110   - 'msodde/dde-test.docm': (False,),
111   - 'msodde/dde-test.xlsb': (False,),
112   - 'msodde/dde-test.xlsm': (False,),
113   - 'msodde/dde-test.docx': (False,),
114   - 'msodde/dde-test.xlsx': (False,),
115   - 'msodde/dde-test-from-office2003.doc': (True, True, WORD, False,
116   - True, False, False, False,
117   - False, False, 0),
118   - 'msodde/dde-test-from-office2016.doc': (True, True, WORD, False,
119   - True, False, False, False,
120   - False, False, 0),
121   - 'msodde/harmless-clean.docx': (False,),
122   - 'oleform/oleform-PR314.docm': (False,),
123   - 'basic/encrypted.docx': CRYPT,
124   - 'oleobj/external_link/sample_with_external_link_to_doc.docx': (False,),
125   - 'oleobj/external_link/sample_with_external_link_to_doc.xlsb': (False,),
126   - 'oleobj/external_link/sample_with_external_link_to_doc.dotm': (False,),
127   - 'oleobj/external_link/sample_with_external_link_to_doc.xlsm': (False,),
128   - 'oleobj/external_link/sample_with_external_link_to_doc.pptx': (False,),
129   - 'oleobj/external_link/sample_with_external_link_to_doc.dotx': (False,),
130   - 'oleobj/external_link/sample_with_external_link_to_doc.docm': (False,),
131   - 'oleobj/external_link/sample_with_external_link_to_doc.potm': (False,),
132   - 'oleobj/external_link/sample_with_external_link_to_doc.xlsx': (False,),
133   - 'oleobj/external_link/sample_with_external_link_to_doc.potx': (False,),
134   - 'oleobj/external_link/sample_with_external_link_to_doc.ppsm': (False,),
135   - 'oleobj/external_link/sample_with_external_link_to_doc.pptm': (False,),
136   - 'oleobj/external_link/sample_with_external_link_to_doc.ppsx': (False,),
137   - 'encrypted/autostart-encrypt-standardpassword.xlsm':
138   - (True, False, 'unknown', True, False, False, False, False, False, False, 0),
139   - 'encrypted/autostart-encrypt-standardpassword.xls':
140   - (True, True, EXCEL, True, False, True, True, False, False, False, 0),
141   - 'encrypted/dde-test-encrypt-standardpassword.xlsx':
142   - (True, False, 'unknown', True, False, False, False, False, False, False, 0),
143   - 'encrypted/dde-test-encrypt-standardpassword.xlsm':
144   - (True, False, 'unknown', True, False, False, False, False, False, False, 0),
145   - 'encrypted/autostart-encrypt-standardpassword.xlsb':
146   - (True, False, 'unknown', True, False, False, False, False, False, False, 0),
147   - 'encrypted/dde-test-encrypt-standardpassword.xls':
148   - (True, True, EXCEL, True, False, False, True, False, False, False, 0),
149   - 'encrypted/dde-test-encrypt-standardpassword.xlsb':
150   - (True, False, 'unknown', True, False, False, False, False, False, False, 0),
151   - }
152   -
153   - indicator_names = []
154   - for base_dir, _, files in os.walk(DATA_BASE_DIR):
155   - for filename in files:
156   - full_path = join(base_dir, filename)
157   - name = relpath(full_path, DATA_BASE_DIR)
158   - values = tuple(indicator.value for indicator in
159   - oleid.OleID(full_path).check())
160   - if len(indicator_names) < 2: # not initialized with ole yet
161   - indicator_names = tuple(indicator.name for indicator in
162   - oleid.OleID(full_path).check())
163   - suffix = splitext(filename)[1]
164   - if suffix in NON_OLE_SUFFIXES:
165   - self.assertEqual(values, NON_OLE_VALUES,
166   - msg='For non-ole file {} expected {}, '
167   - 'not {}'.format(name, NON_OLE_VALUES,
168   - values))
169   - continue
170   - try:
171   - self.assertEqual(values, OLE_VALUES[name],
172   - msg='Wrong detail values for {}:\n'
173   - ' Names {}\n Found {}\n Expect {}'
174   - .format(name, indicator_names, values,
175   - OLE_VALUES[name]))
176   - except KeyError:
177   - print('Should add oleid output for {} to {} ({})'
178   - .format(name, __name__, values))
  19 + def setUp(self):
  20 + """Called before tests; populates self.oleids"""
  21 + self.oleids = []
  22 + for filename, file_contents in loop_over_files():
  23 + curr_id = oleid.OleID(filename=filename, data=file_contents)
  24 + value_dict = dict((ind.id, ind.value) for ind in curr_id.check())
  25 + self.oleids.append((filename, value_dict))
  26 +
  27 + # note: indicators "ftype" and "container" are from ftguess,
  28 + # so tested there, already
  29 +
  30 + def test_properties(self):
  31 + """Test indicators "appname", "codepage" and "author" of ole files."""
  32 + for filename, value_dict in self.oleids:
  33 + # print('Debugging: testing file {0}'.format(filename))
  34 + if value_dict['container'] != CONTAINER.OLE:
  35 + self.assertNotIn('appname', value_dict)
  36 + self.assertNotIn('codepage', value_dict)
  37 + self.assertNotIn('author', value_dict)
  38 + continue
  39 +
  40 + before_dot, suffix = splitext(filename)
  41 + if suffix == '.zip':
  42 + suffix = splitext(before_dot)[1]
  43 +
  44 + if 'encrypted' in filename \
  45 + and suffix != '.xls' and suffix != '.doc':
  46 + self.assertEqual(value_dict['appname'], None)
  47 + self.assertEqual(value_dict['codepage'], None)
  48 + self.assertEqual(value_dict['author'], None)
  49 + continue
  50 +
  51 + if suffix.startswith('.d'):
  52 + self.assertEqual(value_dict['appname'],
  53 + b'Microsoft Office Word')
  54 + elif suffix.startswith('.x'):
  55 + self.assertIn(value_dict['appname'],
  56 + (b'Microsoft Office Excel', b'Microsoft Excel'))
  57 + # old types have no "Office" in the app name
  58 + elif suffix.startswith('.p'):
  59 + self.assertEqual(value_dict['appname'],
  60 + b'Microsoft Office PowerPoint')
  61 + else:
  62 + self.fail('Unexpected suffix {0} from app {1}'
  63 + .format(suffix, value_dict['appname']))
  64 +
  65 + if 'utf_16le-korean' in filename:
  66 + self.assertEqual(value_dict['codepage'],
  67 + '949: ANSI/OEM Korean (Unified Hangul Code)')
  68 + self.assertEqual(value_dict['author'],
  69 + b'\xb1\xe8\xb1\xe2\xc1\xa4;kijeong')
  70 + else:
  71 + self.assertEqual(value_dict['codepage'],
  72 + '1252: ANSI Latin 1; Western European (Windows)')
  73 + self.assertIn(value_dict['author'],
  74 + (b'user', b'schulung',
  75 + b'xxxxxxxxxxxx', b'zzzzzzzzzzzz'))
  76 +
  77 + def test_encrypted(self):
  78 + """Test indicator "encrypted"."""
  79 + for filename, value_dict in self.oleids:
  80 + # print('Debugging: testing file {0}'.format(filename))
  81 + self.assertEqual(value_dict['encrypted'], 'encrypted' in filename)
  82 +
  83 + def test_external_rels(self):
  84 + """Test indicator for external relationships."""
  85 + for filename, value_dict in self.oleids:
  86 + # print('Debugging: testing file {0}'.format(filename))
  87 + self.assertEqual(value_dict['ext_rels'],
  88 + '/external_link/' in filename)
  89 +
  90 + def test_objectpool(self):
  91 + """Test indicator for ObjectPool stream in ole files."""
  92 + for filename, value_dict in self.oleids:
  93 + # print('Debugging: testing file {0}'.format(filename))
  94 + if (filename.startswith('oleobj/sample_with_')
  95 + or filename.startswith('oleobj/embedded')) \
  96 + and (filename.endswith('.doc')
  97 + or filename.endswith('.dot')):
  98 + self.assertTrue(value_dict['ObjectPool'])
  99 + else:
  100 + self.assertFalse(value_dict['ObjectPool'])
  101 +
  102 + def test_macros(self):
  103 + """Test indicator for macros."""
  104 + for filename, value_dict in self.oleids:
  105 + # TODO: we need a sample file with xlm macros
  106 + before_dot, suffix = splitext(filename)
  107 + if suffix == '.zip':
  108 + suffix = splitext(before_dot)[1]
  109 + # print('Debugging: {1}, {2} for {0}'
  110 + # .format(filename, value_dict['vba'], value_dict['xlm']))
  111 +
  112 + # xlm detection does not work in-memory (yet)
  113 + # --> xlm is "unknown" for excel files, except some encrypted files
  114 + self.assertIn(value_dict['xlm'], ('Unknown', 'No'))
  115 +
  116 + # "macro detection" in text files leads to interesting results:
  117 + if filename in ('ooxml/dde-in-excel2003.xml', # not really
  118 + 'encrypted/autostart-encrypt-standardpassword.xls',
  119 + 'msodde/dde-in-csv.csv', # "Windows" "calc.exe"
  120 + 'msodde/dde-in-excel2003.xml', # same as above
  121 + 'oleform/oleform-PR314.docm',
  122 + 'basic/empty', # WTF?
  123 + 'basic/text'): # no macros!
  124 + self.assertEqual(value_dict['vba'], 'Yes')
  125 + else:
  126 + self.assertEqual(value_dict['vba'], 'No')
  127 +
  128 + def test_flash(self):
  129 + """Test indicator for flash."""
  130 + # TODO: add a sample that contains flash
  131 + for filename, value_dict in self.oleids:
  132 + # print('Debugging: testing file {0}'.format(filename))
  133 + self.assertEqual(value_dict['flash'], 0)
  134 +
179 135  
180 136  
181 137 # just in case somebody calls this file as a script
... ...