Commit 3d78a7a2c26b9fc2cc524bd2e807765e782dd96a
1 parent
04d079fc
tests: Create unittests for ftguess
Showing
2 changed files
with
97 additions
and
0 deletions
tests/ftguess/__init__.py
0 → 100644
tests/ftguess/test_basic.py
0 → 100644
| 1 | +"""Test ftguess""" | |
| 2 | + | |
| 3 | +import unittest | |
| 4 | +import os | |
| 5 | +from os.path import splitext | |
| 6 | +from oletools import ftguess | |
| 7 | + | |
| 8 | +# Directory with test data, independent of current working directory | |
| 9 | +from tests.test_utils import DATA_BASE_DIR | |
| 10 | +from tests.test_utils.testdata_reader import loop_over_files | |
| 11 | + | |
| 12 | + | |
| 13 | +class TestFTGuess(unittest.TestCase): | |
| 14 | + """Test ftguess""" | |
| 15 | + | |
| 16 | + def test_all(self): | |
| 17 | + """Run all files in test-data and compare to known ouput""" | |
| 18 | + # ftguess knows extension for each FType, create a reverse mapping | |
| 19 | + used_types = ( | |
| 20 | + ftguess.FType_RTF, ftguess.FType_Generic_OLE, | |
| 21 | + ftguess.FType_Generic_Zip, ftguess.FType_Word97, | |
| 22 | + ftguess.FType_Word2007, ftguess.FType_Word2007_Macro, | |
| 23 | + ftguess.FType_Word2007_Template, | |
| 24 | + ftguess.FType_Word2007_Template_Macro, ftguess.FType_Excel97, | |
| 25 | + ftguess.FType_Excel2007, | |
| 26 | + ftguess.FType_Excel2007_XLSX , ftguess.FType_Excel2007_XLSM , | |
| 27 | + ftguess.FType_Excel2007_Template, | |
| 28 | + ftguess.FType_Excel2007_Template_Macro, | |
| 29 | + ftguess.FType_Excel2007_Addin_Macro, ftguess.FType_Powerpoint97, | |
| 30 | + ftguess.FType_Powerpoint2007_Presentation, | |
| 31 | + ftguess.FType_Powerpoint2007_Slideshow, | |
| 32 | + ftguess.FType_Powerpoint2007_Macro, | |
| 33 | + ftguess.FType_Powerpoint2007_Slideshow_Macro, | |
| 34 | + ftguess.FType_XPS, | |
| 35 | + ) | |
| 36 | + ftype_for_extension = dict() | |
| 37 | + for ftype in used_types: | |
| 38 | + for extension in ftype.extensions: | |
| 39 | + ftype_for_extension[extension] = ftype | |
| 40 | + | |
| 41 | + # TODO: xlsb is not implemented yet | |
| 42 | + ftype_for_extension['xlsb'] = ftguess.FType_Generic_OpenXML | |
| 43 | + | |
| 44 | + for filename, file_contents in loop_over_files(): | |
| 45 | + # let the system guess | |
| 46 | + guess = ftguess.ftype_guess(data=file_contents) | |
| 47 | + #print(f'for debugging: {filename} --> {guess}') | |
| 48 | + | |
| 49 | + # determine what we expect... | |
| 50 | + before_dot, extension = splitext(filename) | |
| 51 | + if extension == '.zip': | |
| 52 | + extension = splitext(before_dot)[1] | |
| 53 | + elif filename in ('basic/empty', 'basic/text'): | |
| 54 | + extension = '.csv' # have just like that | |
| 55 | + elif not extension: | |
| 56 | + self.fail('Could not find extension for test sample {0}' | |
| 57 | + .format(filename)) | |
| 58 | + extension = extension[1:] # remove the leading '.' | |
| 59 | + | |
| 60 | + # encrypted files are mostly recognized (yet?), except .xls | |
| 61 | + if filename.startswith('encrypted/'): | |
| 62 | + if extension == 'xls': | |
| 63 | + expect = ftguess.FType_Excel97 | |
| 64 | + else: | |
| 65 | + expect = ftguess.FType_Generic_OLE | |
| 66 | + | |
| 67 | + elif extension in ('xml', 'csv', 'odt', 'ods', 'odp', 'potx', 'potm'): | |
| 68 | + # not really an office file type | |
| 69 | + expect = ftguess.FType_Unknown | |
| 70 | + | |
| 71 | + elif filename == 'basic/encrypted.docx': | |
| 72 | + expect = ftguess.FType_Generic_OLE | |
| 73 | + | |
| 74 | + else: | |
| 75 | + # other files behave nicely, so extension determines the type | |
| 76 | + expect = ftype_for_extension[extension] | |
| 77 | + | |
| 78 | + self.assertEqual(guess.container, expect.container, | |
| 79 | + msg='ftguess guessed container {0} for {1} ' | |
| 80 | + 'but we expected {2}' | |
| 81 | + .format(guess.container, filename, | |
| 82 | + expect.container)) | |
| 83 | + self.assertEqual(guess.filetype, expect.filetype, | |
| 84 | + msg='ftguess guessed filetype {0} for {1} ' | |
| 85 | + 'but we expected {2}' | |
| 86 | + .format(guess.filetype, filename, | |
| 87 | + expect.filetype)) | |
| 88 | + self.assertEqual(guess.application, expect.application, | |
| 89 | + msg='ftguess guessed application {0} for {1} ' | |
| 90 | + 'but we expected {2}' | |
| 91 | + .format(guess.application, filename, | |
| 92 | + expect.application)) | |
| 93 | + | |
| 94 | + | |
| 95 | +# just in case somebody calls this file as a script | |
| 96 | +if __name__ == '__main__': | |
| 97 | + unittest.main() | ... | ... |