Commit 3d78a7a2c26b9fc2cc524bd2e807765e782dd96a

Authored by Christian Herdtweck
1 parent 04d079fc

tests: Create unittests for ftguess

tests/ftguess/__init__.py 0 → 100644
tests/ftguess/test_basic.py 0 → 100644
  1 +"""Test ftguess"""
  2 +
  3 +import unittest
  4 +import os
  5 +from os.path import splitext
  6 +from oletools import ftguess
  7 +
  8 +# Directory with test data, independent of current working directory
  9 +from tests.test_utils import DATA_BASE_DIR
  10 +from tests.test_utils.testdata_reader import loop_over_files
  11 +
  12 +
  13 +class TestFTGuess(unittest.TestCase):
  14 + """Test ftguess"""
  15 +
  16 + def test_all(self):
  17 + """Run all files in test-data and compare to known ouput"""
  18 + # ftguess knows extension for each FType, create a reverse mapping
  19 + used_types = (
  20 + ftguess.FType_RTF, ftguess.FType_Generic_OLE,
  21 + ftguess.FType_Generic_Zip, ftguess.FType_Word97,
  22 + ftguess.FType_Word2007, ftguess.FType_Word2007_Macro,
  23 + ftguess.FType_Word2007_Template,
  24 + ftguess.FType_Word2007_Template_Macro, ftguess.FType_Excel97,
  25 + ftguess.FType_Excel2007,
  26 + ftguess.FType_Excel2007_XLSX , ftguess.FType_Excel2007_XLSM ,
  27 + ftguess.FType_Excel2007_Template,
  28 + ftguess.FType_Excel2007_Template_Macro,
  29 + ftguess.FType_Excel2007_Addin_Macro, ftguess.FType_Powerpoint97,
  30 + ftguess.FType_Powerpoint2007_Presentation,
  31 + ftguess.FType_Powerpoint2007_Slideshow,
  32 + ftguess.FType_Powerpoint2007_Macro,
  33 + ftguess.FType_Powerpoint2007_Slideshow_Macro,
  34 + ftguess.FType_XPS,
  35 + )
  36 + ftype_for_extension = dict()
  37 + for ftype in used_types:
  38 + for extension in ftype.extensions:
  39 + ftype_for_extension[extension] = ftype
  40 +
  41 + # TODO: xlsb is not implemented yet
  42 + ftype_for_extension['xlsb'] = ftguess.FType_Generic_OpenXML
  43 +
  44 + for filename, file_contents in loop_over_files():
  45 + # let the system guess
  46 + guess = ftguess.ftype_guess(data=file_contents)
  47 + #print(f'for debugging: {filename} --> {guess}')
  48 +
  49 + # determine what we expect...
  50 + before_dot, extension = splitext(filename)
  51 + if extension == '.zip':
  52 + extension = splitext(before_dot)[1]
  53 + elif filename in ('basic/empty', 'basic/text'):
  54 + extension = '.csv' # have just like that
  55 + elif not extension:
  56 + self.fail('Could not find extension for test sample {0}'
  57 + .format(filename))
  58 + extension = extension[1:] # remove the leading '.'
  59 +
  60 + # encrypted files are mostly recognized (yet?), except .xls
  61 + if filename.startswith('encrypted/'):
  62 + if extension == 'xls':
  63 + expect = ftguess.FType_Excel97
  64 + else:
  65 + expect = ftguess.FType_Generic_OLE
  66 +
  67 + elif extension in ('xml', 'csv', 'odt', 'ods', 'odp', 'potx', 'potm'):
  68 + # not really an office file type
  69 + expect = ftguess.FType_Unknown
  70 +
  71 + elif filename == 'basic/encrypted.docx':
  72 + expect = ftguess.FType_Generic_OLE
  73 +
  74 + else:
  75 + # other files behave nicely, so extension determines the type
  76 + expect = ftype_for_extension[extension]
  77 +
  78 + self.assertEqual(guess.container, expect.container,
  79 + msg='ftguess guessed container {0} for {1} '
  80 + 'but we expected {2}'
  81 + .format(guess.container, filename,
  82 + expect.container))
  83 + self.assertEqual(guess.filetype, expect.filetype,
  84 + msg='ftguess guessed filetype {0} for {1} '
  85 + 'but we expected {2}'
  86 + .format(guess.filetype, filename,
  87 + expect.filetype))
  88 + self.assertEqual(guess.application, expect.application,
  89 + msg='ftguess guessed application {0} for {1} '
  90 + 'but we expected {2}'
  91 + .format(guess.application, filename,
  92 + expect.application))
  93 +
  94 +
  95 +# just in case somebody calls this file as a script
  96 +if __name__ == '__main__':
  97 + unittest.main()
... ...