Commit 3d78a7a2c26b9fc2cc524bd2e807765e782dd96a
1 parent
04d079fc
tests: Create unittests for ftguess
Showing
2 changed files
with
97 additions
and
0 deletions
tests/ftguess/__init__.py
0 → 100644
tests/ftguess/test_basic.py
0 → 100644
| 1 | +"""Test ftguess""" | ||
| 2 | + | ||
| 3 | +import unittest | ||
| 4 | +import os | ||
| 5 | +from os.path import splitext | ||
| 6 | +from oletools import ftguess | ||
| 7 | + | ||
| 8 | +# Directory with test data, independent of current working directory | ||
| 9 | +from tests.test_utils import DATA_BASE_DIR | ||
| 10 | +from tests.test_utils.testdata_reader import loop_over_files | ||
| 11 | + | ||
| 12 | + | ||
| 13 | +class TestFTGuess(unittest.TestCase): | ||
| 14 | + """Test ftguess""" | ||
| 15 | + | ||
| 16 | + def test_all(self): | ||
| 17 | + """Run all files in test-data and compare to known ouput""" | ||
| 18 | + # ftguess knows extension for each FType, create a reverse mapping | ||
| 19 | + used_types = ( | ||
| 20 | + ftguess.FType_RTF, ftguess.FType_Generic_OLE, | ||
| 21 | + ftguess.FType_Generic_Zip, ftguess.FType_Word97, | ||
| 22 | + ftguess.FType_Word2007, ftguess.FType_Word2007_Macro, | ||
| 23 | + ftguess.FType_Word2007_Template, | ||
| 24 | + ftguess.FType_Word2007_Template_Macro, ftguess.FType_Excel97, | ||
| 25 | + ftguess.FType_Excel2007, | ||
| 26 | + ftguess.FType_Excel2007_XLSX , ftguess.FType_Excel2007_XLSM , | ||
| 27 | + ftguess.FType_Excel2007_Template, | ||
| 28 | + ftguess.FType_Excel2007_Template_Macro, | ||
| 29 | + ftguess.FType_Excel2007_Addin_Macro, ftguess.FType_Powerpoint97, | ||
| 30 | + ftguess.FType_Powerpoint2007_Presentation, | ||
| 31 | + ftguess.FType_Powerpoint2007_Slideshow, | ||
| 32 | + ftguess.FType_Powerpoint2007_Macro, | ||
| 33 | + ftguess.FType_Powerpoint2007_Slideshow_Macro, | ||
| 34 | + ftguess.FType_XPS, | ||
| 35 | + ) | ||
| 36 | + ftype_for_extension = dict() | ||
| 37 | + for ftype in used_types: | ||
| 38 | + for extension in ftype.extensions: | ||
| 39 | + ftype_for_extension[extension] = ftype | ||
| 40 | + | ||
| 41 | + # TODO: xlsb is not implemented yet | ||
| 42 | + ftype_for_extension['xlsb'] = ftguess.FType_Generic_OpenXML | ||
| 43 | + | ||
| 44 | + for filename, file_contents in loop_over_files(): | ||
| 45 | + # let the system guess | ||
| 46 | + guess = ftguess.ftype_guess(data=file_contents) | ||
| 47 | + #print(f'for debugging: {filename} --> {guess}') | ||
| 48 | + | ||
| 49 | + # determine what we expect... | ||
| 50 | + before_dot, extension = splitext(filename) | ||
| 51 | + if extension == '.zip': | ||
| 52 | + extension = splitext(before_dot)[1] | ||
| 53 | + elif filename in ('basic/empty', 'basic/text'): | ||
| 54 | + extension = '.csv' # have just like that | ||
| 55 | + elif not extension: | ||
| 56 | + self.fail('Could not find extension for test sample {0}' | ||
| 57 | + .format(filename)) | ||
| 58 | + extension = extension[1:] # remove the leading '.' | ||
| 59 | + | ||
| 60 | + # encrypted files are mostly recognized (yet?), except .xls | ||
| 61 | + if filename.startswith('encrypted/'): | ||
| 62 | + if extension == 'xls': | ||
| 63 | + expect = ftguess.FType_Excel97 | ||
| 64 | + else: | ||
| 65 | + expect = ftguess.FType_Generic_OLE | ||
| 66 | + | ||
| 67 | + elif extension in ('xml', 'csv', 'odt', 'ods', 'odp', 'potx', 'potm'): | ||
| 68 | + # not really an office file type | ||
| 69 | + expect = ftguess.FType_Unknown | ||
| 70 | + | ||
| 71 | + elif filename == 'basic/encrypted.docx': | ||
| 72 | + expect = ftguess.FType_Generic_OLE | ||
| 73 | + | ||
| 74 | + else: | ||
| 75 | + # other files behave nicely, so extension determines the type | ||
| 76 | + expect = ftype_for_extension[extension] | ||
| 77 | + | ||
| 78 | + self.assertEqual(guess.container, expect.container, | ||
| 79 | + msg='ftguess guessed container {0} for {1} ' | ||
| 80 | + 'but we expected {2}' | ||
| 81 | + .format(guess.container, filename, | ||
| 82 | + expect.container)) | ||
| 83 | + self.assertEqual(guess.filetype, expect.filetype, | ||
| 84 | + msg='ftguess guessed filetype {0} for {1} ' | ||
| 85 | + 'but we expected {2}' | ||
| 86 | + .format(guess.filetype, filename, | ||
| 87 | + expect.filetype)) | ||
| 88 | + self.assertEqual(guess.application, expect.application, | ||
| 89 | + msg='ftguess guessed application {0} for {1} ' | ||
| 90 | + 'but we expected {2}' | ||
| 91 | + .format(guess.application, filename, | ||
| 92 | + expect.application)) | ||
| 93 | + | ||
| 94 | + | ||
| 95 | +# just in case somebody calls this file as a script | ||
| 96 | +if __name__ == '__main__': | ||
| 97 | + unittest.main() |