Commit 5c5fccf94b4336bb9d19d29a4a09531eaef73580

Authored by Christian Herdtweck
1 parent 924a6f34

unittests: add more sample files for oleobj; test non-streamed

The pre-read test found a bug in oleobj for zipped-xml files. Will fix with
next commit.
tests/oleobj/test_basic.py
@@ -16,6 +16,35 @@ from oletools import oleobj @@ -16,6 +16,35 @@ from oletools import oleobj
16 DEBUG = False 16 DEBUG = False
17 17
18 18
  19 +# test samples in test-data/oleobj: filename, embedded file name, embedded md5
  20 +SAMPLES = (
  21 + ('sample_with_calc_embedded.doc', 'calc.exe',
  22 + '40e85286357723f326980a3b30f84e4f'),
  23 + ('sample_with_lnk_file.doc', 'calc.lnk',
  24 + '6aedb1a876d4ad5236f1fbbbeb7274f3'),
  25 + ('sample_with_lnk_file.pps', 'calc.lnk',
  26 + '6aedb1a876d4ad5236f1fbbbeb7274f3'),
  27 + ('sample_with_lnk_file.ppt', 'calc.lnk',
  28 + '6aedb1a876d4ad5236f1fbbbeb7274f3'),
  29 + ('embedded-unicode.doc', '_nic_de-___________.txt',
  30 + '264397735b6f09039ba0adf0dc9fb942'),
  31 + ('embedded-unicode-2007.docx', '_nic_de-___________.txt',
  32 + '264397735b6f09039ba0adf0dc9fb942'),
  33 +)
  34 +SAMPLES += tuple(
  35 + ('embedded-simple-2007.' + extn, 'simple-text-file.txt',
  36 + 'bd5c063a5a43f67b3c50dc7b0f1195af')
  37 + for extn in ('doc', 'dot', 'docx', 'docm', 'dotx', 'dotm')
  38 +)
  39 +SAMPLES += tuple(
  40 + ('embedded-simple-2007.' + extn, 'simple-text-file.txt',
  41 + 'ab8c65e4c0fc51739aa66ca5888265b4')
  42 + for extn in ('xls', 'xlsx', 'xlsb', 'xlsm', 'xla', 'xlam', 'xlt', 'xltm',
  43 + 'xltx', 'ppt', 'pptx', 'pptm', 'pps', 'ppsx', 'ppsm', 'pot',
  44 + 'potx', 'potm')
  45 +)
  46 +
  47 +
19 def calc_md5(filename): 48 def calc_md5(filename):
20 """ calc md5sum of given file in temp_dir """ 49 """ calc md5sum of given file in temp_dir """
21 chunk_size = 4096 50 chunk_size = 4096
@@ -28,6 +57,21 @@ def calc_md5(filename): @@ -28,6 +57,21 @@ def calc_md5(filename):
28 return hasher.hexdigest() 57 return hasher.hexdigest()
29 58
30 59
  60 +def preread_file(args):
  61 + """helper for TestOleObj.test_non_streamed: preread + call process_file"""
  62 + ignore_arg, output_dir, filename = args
  63 + if ignore_arg != '-d':
  64 + raise ValueError('ignore_arg not as expected!')
  65 + with open(filename, 'rb') as file_handle:
  66 + data = file_handle.read()
  67 + err_stream, err_dumping, did_dump = \
  68 + oleobj.process_file(filename, data, output_dir=output_dir)
  69 + if did_dump and not err_stream and not err_dumping:
  70 + return oleobj.RETURN_DID_DUMP
  71 + else:
  72 + return oleobj.RETURN_NO_DUMP # just anything else
  73 +
  74 +
31 class TestOleObj(unittest.TestCase): 75 class TestOleObj(unittest.TestCase):
32 """ Tests oleobj basic feature """ 76 """ Tests oleobj basic feature """
33 77
@@ -35,6 +79,10 @@ class TestOleObj(unittest.TestCase): @@ -35,6 +79,10 @@ class TestOleObj(unittest.TestCase):
35 """ fixture start: create temp dir """ 79 """ fixture start: create temp dir """
36 self.temp_dir = mkdtemp(prefix='oletools-oleobj-') 80 self.temp_dir = mkdtemp(prefix='oletools-oleobj-')
37 self.did_fail = False 81 self.did_fail = False
  82 + if DEBUG:
  83 + import logging
  84 + logging.basicConfig(level=logging.DEBUG if DEBUG else logging.INFO)
  85 + oleobj.log.setLevel(logging.NOTSET)
38 86
39 def tearDown(self): 87 def tearDown(self):
40 """ fixture end: remove temp dir """ 88 """ fixture end: remove temp dir """
@@ -67,32 +115,12 @@ class TestOleObj(unittest.TestCase): @@ -67,32 +115,12 @@ class TestOleObj(unittest.TestCase):
67 self.fail('found embedded data in {0}'.format(sample_name)) 115 self.fail('found embedded data in {0}'.format(sample_name))
68 self.assertEqual(ret_val, oleobj.RETURN_NO_DUMP) 116 self.assertEqual(ret_val, oleobj.RETURN_NO_DUMP)
69 117
70 - def do_test_md5(self, args): 118 + def do_test_md5(self, args, test_fun=oleobj.main):
71 """ helper for test_md5 and test_md5_args """ 119 """ helper for test_md5 and test_md5_args """
72 # name of sample, extension of embedded file, md5 hash of embedded file 120 # name of sample, extension of embedded file, md5 hash of embedded file
73 - expected_results = (  
74 - ('sample_with_calc_embedded.doc', 'calc.exe',  
75 - '40e85286357723f326980a3b30f84e4f'),  
76 - ('sample_with_lnk_file.doc', 'calc.lnk',  
77 - '6aedb1a876d4ad5236f1fbbbeb7274f3'),  
78 - ('sample_with_lnk_file.pps', 'calc.lnk',  
79 - '6aedb1a876d4ad5236f1fbbbeb7274f3'),  
80 - ('sample_with_lnk_file.ppt', 'calc.lnk',  
81 - '6aedb1a876d4ad5236f1fbbbeb7274f3'),  
82 - ('embedded-unicode.doc', '_nic_de-___________.txt',  
83 - '264397735b6f09039ba0adf0dc9fb942'),  
84 - ('embedded-unicode-2007.docx', '_nic_de-___________.txt',  
85 - '264397735b6f09039ba0adf0dc9fb942'),  
86 - )  
87 - expected_results += tuple(  
88 - ('embedded-simple-2007.' + extn, 'simple-text-file.txt',  
89 - 'bd5c063a5a43f67b3c50dc7b0f1195af')  
90 - for extn in ('doc', 'dot', 'docx', 'docm', 'dotx', 'dotm')  
91 - )  
92 -  
93 data_dir = join(DATA_BASE_DIR, 'oleobj') 121 data_dir = join(DATA_BASE_DIR, 'oleobj')
94 - for sample_name, embedded_name, expect_hash in expected_results:  
95 - ret_val = oleobj.main(args + [join(data_dir, sample_name), ]) 122 + for sample_name, embedded_name, expect_hash in SAMPLES:
  123 + ret_val = test_fun(args + [join(data_dir, sample_name), ])
96 self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP) 124 self.assertEqual(ret_val, oleobj.RETURN_DID_DUMP)
97 expect_name = join(self.temp_dir, 125 expect_name = join(self.temp_dir,
98 sample_name + '_' + embedded_name) 126 sample_name + '_' + embedded_name)
@@ -108,6 +136,10 @@ class TestOleObj(unittest.TestCase): @@ -108,6 +136,10 @@ class TestOleObj(unittest.TestCase):
108 .format(md5_hash, expect_name, sample_name)) 136 .format(md5_hash, expect_name, sample_name))
109 continue 137 continue
110 138
  139 + def test_non_streamed(self):
  140 + """ Ensure old oleobj behaviour still works: pre-read whole file """
  141 + return self.do_test_md5(['-d', self.temp_dir], test_fun=preread_file)
  142 +
111 143
112 # just in case somebody calls this file as a script 144 # just in case somebody calls this file as a script
113 if __name__ == '__main__': 145 if __name__ == '__main__':
tests/test-data/oleobj/embedded-simple-2007.pot 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.potm 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.potx 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.pps 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.ppsm 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.ppsx 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.ppt 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.pptm 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.pptx 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.xla 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.xlam 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.xls 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.xlsb 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.xlsm 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.xlsx 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.xlt 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.xltm 0 → 100644
No preview for this file type
tests/test-data/oleobj/embedded-simple-2007.xltx 0 → 100644
No preview for this file type