test_basic.py
2.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
""" Basic tests for ooxml.py """
import unittest
import os
from os.path import join, splitext
from tests.test_utils import DATA_BASE_DIR
from oletools.thirdparty.olefile import isOleFile
from oletools import ooxml
class TestOOXML(unittest.TestCase):
""" Tests correct detection of doc type """
DO_DEBUG = False
def test_all_rough(self):
"""Checks all samples, expect either ole files or good ooxml output"""
# map from extension to expected doctype
ext2doc = dict(
docx=ooxml.DOCTYPE_WORD, docm=ooxml.DOCTYPE_WORD,
xml=(ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_WORD_XML),
xlsx=ooxml.DOCTYPE_EXCEL, xlsm=ooxml.DOCTYPE_EXCEL,
xlsb=ooxml.DOCTYPE_EXCEL,
pptx=ooxml.DOCTYPE_POWERPOINT, pptm=ooxml.DOCTYPE_POWERPOINT,
)
# files that are neither OLE nor xml:
except_files = 'empty', 'text'
except_extns = 'rtf'
# analyse all files in data dir
for base_dir, _, files in os.walk(DATA_BASE_DIR):
for filename in files:
if filename in except_files:
if self.DO_DEBUG:
print('skip file: ' + filename)
continue
extn = splitext(filename)[1]
if extn:
extn = extn[1:] # remove the dot
if extn in except_extns:
if self.DO_DEBUG:
print('skip extn: ' + filename)
continue
full_name = join(base_dir, filename)
if isOleFile(full_name):
if self.DO_DEBUG:
print('skip ole: ' + filename)
continue
acceptable = ext2doc[extn]
if not isinstance(acceptable, tuple):
acceptable = (acceptable, )
try:
doctype = ooxml.get_type(full_name)
except Exception:
self.fail('Failed to get doctype of {0}'.format(filename))
self.assertTrue(doctype in acceptable,
msg='Doctype "{0}" for {1} not acceptable'
.format(doctype, full_name))
if self.DO_DEBUG:
print('ok: {0} --> {1}'.format(filename, doctype))
# just in case somebody calls this file as a script
if __name__ == '__main__':
unittest.main()