diff --git a/oletools/olevba.py b/oletools/olevba.py index 67d42ca..6edbb5e 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -2024,19 +2024,19 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): return json_obj -_have_printed_json_start = False - -def print_json(json_dict=None, _json_is_last=False, **json_parts): +def print_json(json_dict=None, _json_is_first=False, _json_is_last=False, + **json_parts): """ line-wise print of json.dumps(json2ascii(..)) with options and indent+1 can use in two ways: (1) print_json(some_dict) (2) print_json(key1=value1, key2=value2, ...) + :param bool _json_is_first: set to True only for very first entry to complete + the top-level json-list :param bool _json_is_last: set to True only for very last entry to complete the top-level json-list """ - global _have_printed_json_start if json_dict and json_parts: raise ValueError('Invalid json argument: want either single dict or ' @@ -2048,9 +2048,8 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts): if json_parts: json_dict = json_parts - if not _have_printed_json_start: + if _json_is_first: print('[') - _have_printed_json_start = True lines = json.dumps(json2ascii(json_dict), check_circular=False, indent=4, ensure_ascii=False).splitlines() @@ -3269,10 +3268,9 @@ class VBA_Parser_CLI(VBA_Parser): #=== MAIN ===================================================================== -def main(): - """ - Main function, called when olevba is run from the command line - """ +def parse_args(cmd_line_args=None): + """ parse command line arguments (given ones or per default sys.argv) """ + DEFAULT_LOG_LEVEL = "warning" # Default log level LOG_LEVELS = { 'debug': logging.DEBUG, @@ -3324,7 +3322,7 @@ def main(): parser.add_option('--relaxed', dest="relaxed", action="store_true", default=False, help="Do not raise errors if opening of substream fails") - (options, args) = parser.parse_args() + (options, args) = parser.parse_args(cmd_line_args) # Print help if no arguments are passed if len(args) == 0: @@ -3333,16 +3331,32 @@ def main(): parser.print_help() sys.exit(RETURN_WRONG_ARGS) + options.loglevel = LOG_LEVELS[options.loglevel] + + return options, args + + +def main(cmd_line_args=None): + """ + Main function, called when olevba is run from the command line + + Optional argument: command line arguments to be forwarded to ArgumentParser + in process_args. Per default (cmd_line_args=None), sys.argv is used. Option + mainly added for unit-testing + """ + + options, args = parse_args(cmd_line_args) + # provide info about tool and its version if options.output_mode == 'json': - # prints opening [ + # print first json entry with meta info and opening '[' print_json(script_name='olevba', version=__version__, url='http://decalage.info/python/oletools', - type='MetaInformation') + type='MetaInformation', _json_is_first=True) else: print('olevba %s - http://decalage.info/python/oletools' % __version__) - logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') + logging.basicConfig(level=options.loglevel, format='%(levelname)-8s %(message)s') # enable logging in the modules: enable_logging() diff --git a/oletools/olevba3.py b/oletools/olevba3.py index 802b3c3..495ab31 100644 --- a/oletools/olevba3.py +++ b/oletools/olevba3.py @@ -1988,20 +1988,19 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): return json_obj -_have_printed_json_start = False - -def print_json(json_dict=None, _json_is_last=False, **json_parts): +def print_json(json_dict=None, _json_is_first=False, _json_is_last=False, + **json_parts): """ line-wise print of json.dumps(json2ascii(..)) with options and indent+1 can use in two ways: (1) print_json(some_dict) (2) print_json(key1=value1, key2=value2, ...) + :param bool _json_is_first: set to True only for very first entry to complete + the top-level json-list :param bool _json_is_last: set to True only for very last entry to complete the top-level json-list """ - global _have_printed_json_start - if json_dict and json_parts: raise ValueError('Invalid json argument: want either single dict or ' 'key=value parts but got both)') @@ -2012,9 +2011,8 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts): if json_parts: json_dict = json_parts - if not _have_printed_json_start: + if _json_is_first: print('[') - _have_printed_json_start = True lines = json.dumps(json2ascii(json_dict), check_circular=False, indent=4, ensure_ascii=False).splitlines() @@ -3232,10 +3230,9 @@ class VBA_Parser_CLI(VBA_Parser): #=== MAIN ===================================================================== -def main(): - """ - Main function, called when olevba is run from the command line - """ +def parse_args(cmd_line_args=None): + """ parse command line arguments (given ones or per default sys.argv) """ + DEFAULT_LOG_LEVEL = "warning" # Default log level LOG_LEVELS = { 'debug': logging.DEBUG, @@ -3287,7 +3284,7 @@ def main(): parser.add_option('--relaxed', dest="relaxed", action="store_true", default=False, help="Do not raise errors if opening of substream fails") - (options, args) = parser.parse_args() + (options, args) = parser.parse_args(cmd_line_args) # Print help if no arguments are passed if len(args) == 0: @@ -3295,16 +3292,32 @@ def main(): parser.print_help() sys.exit(RETURN_WRONG_ARGS) + options.loglevel = LOG_LEVELS[options.loglevel] + + return options, args + + +def main(cmd_line_args=None): + """ + Main function, called when olevba is run from the command line + + Optional argument: command line arguments to be forwarded to ArgumentParser + in process_args. Per default (cmd_line_args=None), sys.argv is used. Option + mainly added for unit-testing + """ + + options, args = parse_args(cmd_line_args) + # provide info about tool and its version if options.output_mode == 'json': - # prints opening [ + # print first json entry with meta info and opening '[' print_json(script_name='olevba', version=__version__, url='http://decalage.info/python/oletools', - type='MetaInformation') + type='MetaInformation', _json_is_first=True) else: print('olevba %s - http://decalage.info/python/oletools' % __version__) - logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') + logging.basicConfig(level=options.loglevel, format='%(levelname)-8s %(message)s') # enable logging in the modules: log.setLevel(logging.NOTSET) diff --git a/tests/howto_add_unittests.txt b/tests/howto_add_unittests.txt index 3178741..2501bcc 100644 --- a/tests/howto_add_unittests.txt +++ b/tests/howto_add_unittests.txt @@ -1,8 +1,12 @@ Howto: Add unittests -------------------- -For helping python's unittest to discover your tests, do the -following: +Note: The following are just guidelines to help inexperienced users create unit +tests. The python unittest library (see +https://docs.python.org/2/library/unittest.html) offers much more flexibility +than described here. + +For helping python's unittest to discover your tests, do the following: * create a subdirectory within oletools/tests/ - The directory name must be a valid python package name, diff --git a/tests/json/__init__.py b/tests/json/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/json/__init__.py diff --git a/tests/json/test_output.py b/tests/json/test_output.py new file mode 100644 index 0000000..db0655e --- /dev/null +++ b/tests/json/test_output.py @@ -0,0 +1,95 @@ +""" Test validity of json output + +Some scripts have a json output flag. Verify that at default log levels output +can be captured as-is and parsed by a json parser -- at least if the scripts +return 0 +""" + +import unittest +import sys +import json +import os +from os.path import join +from oletools import msodde +from tests.test_utils import OutputCapture, DATA_BASE_DIR + +if sys.version_info[0] <= 2: + from oletools import olevba +else: + from oletools import olevba3 as olevba + + +class TestValidJson(unittest.TestCase): + """ Ensure that script output is valid json (if return code is 0) """ + + def iter_test_files(self): + """ Iterate over all test files in DATA_BASE_DIR """ + for dirpath, _, filenames in os.walk(DATA_BASE_DIR): + for filename in filenames: + yield join(dirpath, filename) + + def run_and_parse(self, program, args, print_output=False): + """ run single program with single file and parse output """ + return_code = None + with OutputCapture() as capturer: # capture stdout + try: + return_code = program(args) + except Exception: + return_code = 1 # would result in non-zero exit + except SystemExit as se: + return_code = se.code or 0 # se.code can be None + if return_code is not 0: + if print_output: + print('Command failed ({0}) -- not parsing output' + .format(return_code)) + return [] # no need to test + + self.assertNotEqual(return_code, None, + msg='self-test fail: return_code not set') + + # now test output + if print_output: + print(capturer.buffer.getvalue()) + capturer.buffer.seek(0) # re-set position in file-like stream + try: + json_data = json.load(capturer.buffer) + except ValueError: + self.fail('Invalid json:\n' + capturer.buffer.getvalue()) + self.assertNotEqual(len(json_data), 0, msg='Output was empty') + return json_data + + def run_all_files(self, program, args_without_filename, print_output=False): + """ run test for a single program over all test files """ + n_files = 0 + for testfile in self.iter_test_files(): # loop over all input + args = args_without_filename + [testfile, ] + self.run_and_parse(program, args, print_output) + n_files += 1 + self.assertNotEqual(n_files, 0, + msg='self-test fail: No test files found') + + def test_msodde(self): + """ Test msodde.py """ + self.run_all_files(msodde.main, ['-j', ]) + + def test_olevba(self): + """ Test olevba.py with default args """ + self.run_all_files(olevba.main, ['-j', ]) + + def test_olevba_analysis(self): + """ Test olevba.py with -a """ + self.run_all_files(olevba.main, ['-j', '-a', ]) + + def test_olevba_recurse(self): + """ Test olevba.py with -r """ + json_data = self.run_and_parse(olevba.main, + ['-j', '-r', join(DATA_BASE_DIR, '*')]) + self.assertNotEqual(len(json_data), 0, + msg='olevba[3] returned non-zero or no output') + self.assertNotEqual(json_data[-1]['n_processed'], 0, + msg='self-test fail: No test files found!') + + +# just in case somebody calls this file as a script +if __name__ == '__main__': + unittest.main() diff --git a/tests/msodde_doc/test_basic.py b/tests/msodde_doc/test_basic.py index 0d366b1..ec6ead8 100644 --- a/tests/msodde_doc/test_basic.py +++ b/tests/msodde_doc/test_basic.py @@ -10,32 +10,38 @@ from __future__ import print_function import unittest from oletools import msodde +from tests.test_utils import OutputCapture, DATA_BASE_DIR as BASE_DIR import shlex -from os.path import join, dirname, normpath -import sys - -# python 2/3 version conflict: -if sys.version_info.major <= 2: - from StringIO import StringIO - #from io import BytesIO as StringIO - try if print() gives UnicodeError -else: - from io import StringIO - - -# base directory for test input -BASE_DIR = normpath(join(dirname(__file__), '..', 'test-data')) +from os.path import join +from traceback import print_exc class TestReturnCode(unittest.TestCase): def test_valid_doc(self): """ check that a valid doc file leads to 0 exit status """ - print(join(BASE_DIR, 'msodde-doc/test_document.doc')) - self.do_test_validity(join(BASE_DIR, 'msodde-doc/test_document.doc')) + for filename in ('dde-test-from-office2003', 'dde-test-from-office2016', + 'harmless-clean'): + self.do_test_validity(join(BASE_DIR, 'msodde-doc', + filename + '.doc')) def test_valid_docx(self): """ check that a valid docx file leads to 0 exit status """ - self.do_test_validity(join(BASE_DIR, 'msodde-doc/test_document.docx')) + for filename in 'dde-test', 'harmless-clean': + self.do_test_validity(join(BASE_DIR, 'msodde-doc', + filename + '.docx')) + + def test_valid_docm(self): + """ check that a valid docm file leads to 0 exit status """ + for filename in 'dde-test', 'harmless-clean': + self.do_test_validity(join(BASE_DIR, 'msodde-doc', + filename + '.docm')) + + def test_invalid_other(self): + """ check that xml do not work yet """ + for extn in '-2003.xml', '.xml': + self.do_test_validity(join(BASE_DIR, 'msodde-doc', + 'harmless-clean' + extn), True) def test_invalid_none(self): """ check that no file argument leads to non-zero exit status """ @@ -58,61 +64,49 @@ class TestReturnCode(unittest.TestCase): return_code = msodde.main(args) except Exception: have_exception = True + print_exc() except SystemExit as se: # sys.exit() was called return_code = se.code if se.code is None: return_code = 0 - self.assertEqual(expect_error, have_exception or (return_code != 0)) - - -class OutputCapture: - """ context manager that captures stdout """ + self.assertEqual(expect_error, have_exception or (return_code != 0), + msg='Args={0}, expect={1}, exc={2}, return={3}' + .format(args, expect_error, have_exception, + return_code)) - def __init__(self): - self.output = StringIO() # in py2, this actually is BytesIO - def __enter__(self): - sys.stdout = self.output - return self - - def __exit__(self, exc_type, exc_value, traceback): - sys.stdout = sys.__stdout__ # re-set to original - - if exc_type: # there has been an error - print('Got error during output capture!') - print('Print captured output and re-raise:') - for line in self.output.getvalue().splitlines(): - print(line.rstrip()) # print output before re-raising - - def __iter__(self): - for line in self.output.getvalue().splitlines(): - yield line.rstrip() # remove newline at end of line +class TestDdeInDoc(unittest.TestCase): + def get_dde_from_output(self, capturer): + """ helper to read dde links from captured output """ + have_start_line = False + result = [] + for line in capturer: + if not line.strip(): + continue # skip empty lines + if have_start_line: + result.append(line) + elif line == 'DDE Links:': + have_start_line = True -class TestDdeInDoc(unittest.TestCase): + self.assertTrue(have_start_line) # ensure output was complete + return result def test_with_dde(self): """ check that dde links appear on stdout """ with OutputCapture() as capturer: - msodde.main([join(BASE_DIR, 'msodde-doc', 'dde-test.doc')]) - - for line in capturer: - print(line) - pass # we just want to get the last line - - self.assertNotEqual(len(line.strip()), 0) + msodde.main([join(BASE_DIR, 'msodde-doc', + 'dde-test-from-office2003.doc')]) + self.assertNotEqual(len(self.get_dde_from_output(capturer)), 0, + msg='Found no dde links in output for doc file') def test_no_dde(self): """ check that no dde links appear on stdout """ with OutputCapture() as capturer: - msodde.main([join(BASE_DIR, 'msodde-doc', 'test_document.doc')]) - - for line in capturer: - print(line) - pass # we just want to get the last line - - self.assertEqual(line.strip(), '') + msodde.main([join(BASE_DIR, 'msodde-doc', 'harmless-clean.doc')]) + self.assertEqual(len(self.get_dde_from_output(capturer)), 0, + msg='Found dde links in output for doc file') if __name__ == '__main__': diff --git a/tests/rtfobj/test_issue_185.py b/tests/rtfobj/test_issue_185.py index cf8358e..a395a67 100644 --- a/tests/rtfobj/test_issue_185.py +++ b/tests/rtfobj/test_issue_185.py @@ -1,6 +1,6 @@ import unittest, sys, os -from .. import testdata_reader +from tests.test_utils import testdata_reader from oletools import rtfobj class TestRtfObjIssue185(unittest.TestCase): diff --git a/tests/test-data/msodde-doc/dde-test.doc b/tests/test-data/msodde-doc/dde-test-from-office2003.doc index da5562c..da5562c 100644 --- a/tests/test-data/msodde-doc/dde-test.doc +++ b/tests/test-data/msodde-doc/dde-test-from-office2003.doc diff --git a/tests/test-data/msodde-doc/dde-test-from-office2016.doc b/tests/test-data/msodde-doc/dde-test-from-office2016.doc new file mode 100644 index 0000000..563de19 --- /dev/null +++ b/tests/test-data/msodde-doc/dde-test-from-office2016.doc diff --git a/tests/test-data/msodde-doc/dde-test.docm b/tests/test-data/msodde-doc/dde-test.docm new file mode 100644 index 0000000..ee5362a --- /dev/null +++ b/tests/test-data/msodde-doc/dde-test.docm diff --git a/tests/test-data/msodde-doc/dde-test.docx b/tests/test-data/msodde-doc/dde-test.docx new file mode 100644 index 0000000..5fba6b2 --- /dev/null +++ b/tests/test-data/msodde-doc/dde-test.docx diff --git a/tests/test-data/msodde-doc/harmless-clean-2003.xml b/tests/test-data/msodde-doc/harmless-clean-2003.xml new file mode 100644 index 0000000..477069f --- /dev/null +++ b/tests/test-data/msodde-doc/harmless-clean-2003.xml @@ -0,0 +1,3 @@ + + +useruser202017-10-26T09:10:00Z2017-10-26T09:10:00Z1392502128816TestThis is a harmless test document.It contains neither macros nor dde links nor embedded viruses nor links to evil web pages. Not even a single insult. Boring!Just to make things slightly interesting, however, we add some ünicöde-ßtringß and different text sizes, colors and fonts \ No newline at end of file diff --git a/tests/test-data/msodde-doc/harmless-clean.doc b/tests/test-data/msodde-doc/harmless-clean.doc new file mode 100644 index 0000000..38fcf72 --- /dev/null +++ b/tests/test-data/msodde-doc/harmless-clean.doc diff --git a/tests/test-data/msodde-doc/harmless-clean.docm b/tests/test-data/msodde-doc/harmless-clean.docm new file mode 100644 index 0000000..f234cae --- /dev/null +++ b/tests/test-data/msodde-doc/harmless-clean.docm diff --git a/tests/test-data/msodde-doc/harmless-clean.docx b/tests/test-data/msodde-doc/harmless-clean.docx new file mode 100644 index 0000000..59099f3 --- /dev/null +++ b/tests/test-data/msodde-doc/harmless-clean.docx diff --git a/tests/test-data/msodde-doc/harmless-clean.xml b/tests/test-data/msodde-doc/harmless-clean.xml new file mode 100644 index 0000000..cd1e53c --- /dev/null +++ b/tests/test-data/msodde-doc/harmless-clean.xml @@ -0,0 +1,3 @@ + + +TestThis is a harmless test document.It contains neither macros nor dde links nor embedded viruses nor links to evil web pages. Not even a single insult. Boring!Just to make things slightly interesting, however, we add some ünicöde-ßtringß and different text sizes, colors and fonts0139250Microsoft Office Word021falseTitel1false288falsefalse16.0000useruser22017-10-26T09:10:00Z2017-10-26T09:10:00Z \ No newline at end of file diff --git a/tests/test-data/msodde-doc/test_document.doc b/tests/test-data/msodde-doc/test_document.doc deleted file mode 100644 index 2c1768f..0000000 --- a/tests/test-data/msodde-doc/test_document.doc +++ /dev/null diff --git a/tests/test-data/msodde-doc/test_document.docx b/tests/test-data/msodde-doc/test_document.docx deleted file mode 100644 index 4dd2265..0000000 --- a/tests/test-data/msodde-doc/test_document.docx +++ /dev/null diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py new file mode 100644 index 0000000..fca8642 --- /dev/null +++ b/tests/test_utils/__init__.py @@ -0,0 +1,6 @@ +from .output_capture import OutputCapture + +from os.path import dirname, join + +# Directory with test data, independent of current working directory +DATA_BASE_DIR = join(dirname(dirname(__file__)), 'test-data') diff --git a/tests/test_utils/output_capture.py b/tests/test_utils/output_capture.py new file mode 100644 index 0000000..686bc38 --- /dev/null +++ b/tests/test_utils/output_capture.py @@ -0,0 +1,50 @@ +""" class OutputCapture to test what scripts print to stdout """ + +from __future__ import print_function +import sys + + +# python 2/3 version conflict: +if sys.version_info.major <= 2: + from StringIO import StringIO +else: + from io import StringIO + +class OutputCapture: + """ context manager that captures stdout + + use as follows:: + + with OutputCapture() as capturer: + run_my_script(some_args) + + # either test line-by-line ... + for line in capturer: + some_test(line) + # ...or test all output in one go + some_test(capturer.buffer.getvalue()) + + """ + + def __init__(self): + self.buffer = StringIO() + self.orig_stdout = None + + def __enter__(self): + # replace sys.stdout with own buffer. + self.orig_stdout = sys.stdout + sys.stdout = self.buffer + return self + + def __exit__(self, exc_type, exc_value, traceback): + sys.stdout = self.orig_stdout # re-set to original + + if exc_type: # there has been an error + print('Got error during output capture!') + print('Print captured output and re-raise:') + for line in self.buffer.getvalue().splitlines(): + print(line.rstrip()) # print output before re-raising + + def __iter__(self): + for line in self.buffer.getvalue().splitlines(): + yield line.rstrip() # remove newline at end of line diff --git a/tests/test_utils/testdata_reader.py b/tests/test_utils/testdata_reader.py new file mode 100644 index 0000000..4445024 --- /dev/null +++ b/tests/test_utils/testdata_reader.py @@ -0,0 +1,8 @@ +import os +from os.path import dirname, abspath, normpath, join +from . import DATA_BASE_DIR + + +def read(relative_path): + with open(join(DATA_BASE_DIR, relative_path), 'rb') as file_handle: + return file_handle.read() diff --git a/tests/testdata_reader.py b/tests/testdata_reader.py deleted file mode 100644 index e474141..0000000 --- a/tests/testdata_reader.py +++ /dev/null @@ -1,6 +0,0 @@ -import os - -def read(relative_path): - test_data = os.path.dirname(os.path.abspath(__file__)) + '/test-data/' - return open(test_data + relative_path, 'rb').read() -