diff --git a/oletools/olevba.py b/oletools/olevba.py
index 67d42ca..6edbb5e 100644
--- a/oletools/olevba.py
+++ b/oletools/olevba.py
@@ -2024,19 +2024,19 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'):
return json_obj
-_have_printed_json_start = False
-
-def print_json(json_dict=None, _json_is_last=False, **json_parts):
+def print_json(json_dict=None, _json_is_first=False, _json_is_last=False,
+ **json_parts):
""" line-wise print of json.dumps(json2ascii(..)) with options and indent+1
can use in two ways:
(1) print_json(some_dict)
(2) print_json(key1=value1, key2=value2, ...)
+ :param bool _json_is_first: set to True only for very first entry to complete
+ the top-level json-list
:param bool _json_is_last: set to True only for very last entry to complete
the top-level json-list
"""
- global _have_printed_json_start
if json_dict and json_parts:
raise ValueError('Invalid json argument: want either single dict or '
@@ -2048,9 +2048,8 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts):
if json_parts:
json_dict = json_parts
- if not _have_printed_json_start:
+ if _json_is_first:
print('[')
- _have_printed_json_start = True
lines = json.dumps(json2ascii(json_dict), check_circular=False,
indent=4, ensure_ascii=False).splitlines()
@@ -3269,10 +3268,9 @@ class VBA_Parser_CLI(VBA_Parser):
#=== MAIN =====================================================================
-def main():
- """
- Main function, called when olevba is run from the command line
- """
+def parse_args(cmd_line_args=None):
+ """ parse command line arguments (given ones or per default sys.argv) """
+
DEFAULT_LOG_LEVEL = "warning" # Default log level
LOG_LEVELS = {
'debug': logging.DEBUG,
@@ -3324,7 +3322,7 @@ def main():
parser.add_option('--relaxed', dest="relaxed", action="store_true", default=False,
help="Do not raise errors if opening of substream fails")
- (options, args) = parser.parse_args()
+ (options, args) = parser.parse_args(cmd_line_args)
# Print help if no arguments are passed
if len(args) == 0:
@@ -3333,16 +3331,32 @@ def main():
parser.print_help()
sys.exit(RETURN_WRONG_ARGS)
+ options.loglevel = LOG_LEVELS[options.loglevel]
+
+ return options, args
+
+
+def main(cmd_line_args=None):
+ """
+ Main function, called when olevba is run from the command line
+
+ Optional argument: command line arguments to be forwarded to ArgumentParser
+ in process_args. Per default (cmd_line_args=None), sys.argv is used. Option
+ mainly added for unit-testing
+ """
+
+ options, args = parse_args(cmd_line_args)
+
# provide info about tool and its version
if options.output_mode == 'json':
- # prints opening [
+ # print first json entry with meta info and opening '['
print_json(script_name='olevba', version=__version__,
url='http://decalage.info/python/oletools',
- type='MetaInformation')
+ type='MetaInformation', _json_is_first=True)
else:
print('olevba %s - http://decalage.info/python/oletools' % __version__)
- logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
+ logging.basicConfig(level=options.loglevel, format='%(levelname)-8s %(message)s')
# enable logging in the modules:
enable_logging()
diff --git a/oletools/olevba3.py b/oletools/olevba3.py
index 802b3c3..495ab31 100644
--- a/oletools/olevba3.py
+++ b/oletools/olevba3.py
@@ -1988,20 +1988,19 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'):
return json_obj
-_have_printed_json_start = False
-
-def print_json(json_dict=None, _json_is_last=False, **json_parts):
+def print_json(json_dict=None, _json_is_first=False, _json_is_last=False,
+ **json_parts):
""" line-wise print of json.dumps(json2ascii(..)) with options and indent+1
can use in two ways:
(1) print_json(some_dict)
(2) print_json(key1=value1, key2=value2, ...)
+ :param bool _json_is_first: set to True only for very first entry to complete
+ the top-level json-list
:param bool _json_is_last: set to True only for very last entry to complete
the top-level json-list
"""
- global _have_printed_json_start
-
if json_dict and json_parts:
raise ValueError('Invalid json argument: want either single dict or '
'key=value parts but got both)')
@@ -2012,9 +2011,8 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts):
if json_parts:
json_dict = json_parts
- if not _have_printed_json_start:
+ if _json_is_first:
print('[')
- _have_printed_json_start = True
lines = json.dumps(json2ascii(json_dict), check_circular=False,
indent=4, ensure_ascii=False).splitlines()
@@ -3232,10 +3230,9 @@ class VBA_Parser_CLI(VBA_Parser):
#=== MAIN =====================================================================
-def main():
- """
- Main function, called when olevba is run from the command line
- """
+def parse_args(cmd_line_args=None):
+ """ parse command line arguments (given ones or per default sys.argv) """
+
DEFAULT_LOG_LEVEL = "warning" # Default log level
LOG_LEVELS = {
'debug': logging.DEBUG,
@@ -3287,7 +3284,7 @@ def main():
parser.add_option('--relaxed', dest="relaxed", action="store_true", default=False,
help="Do not raise errors if opening of substream fails")
- (options, args) = parser.parse_args()
+ (options, args) = parser.parse_args(cmd_line_args)
# Print help if no arguments are passed
if len(args) == 0:
@@ -3295,16 +3292,32 @@ def main():
parser.print_help()
sys.exit(RETURN_WRONG_ARGS)
+ options.loglevel = LOG_LEVELS[options.loglevel]
+
+ return options, args
+
+
+def main(cmd_line_args=None):
+ """
+ Main function, called when olevba is run from the command line
+
+ Optional argument: command line arguments to be forwarded to ArgumentParser
+ in process_args. Per default (cmd_line_args=None), sys.argv is used. Option
+ mainly added for unit-testing
+ """
+
+ options, args = parse_args(cmd_line_args)
+
# provide info about tool and its version
if options.output_mode == 'json':
- # prints opening [
+ # print first json entry with meta info and opening '['
print_json(script_name='olevba', version=__version__,
url='http://decalage.info/python/oletools',
- type='MetaInformation')
+ type='MetaInformation', _json_is_first=True)
else:
print('olevba %s - http://decalage.info/python/oletools' % __version__)
- logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
+ logging.basicConfig(level=options.loglevel, format='%(levelname)-8s %(message)s')
# enable logging in the modules:
log.setLevel(logging.NOTSET)
diff --git a/tests/howto_add_unittests.txt b/tests/howto_add_unittests.txt
index 3178741..2501bcc 100644
--- a/tests/howto_add_unittests.txt
+++ b/tests/howto_add_unittests.txt
@@ -1,8 +1,12 @@
Howto: Add unittests
--------------------
-For helping python's unittest to discover your tests, do the
-following:
+Note: The following are just guidelines to help inexperienced users create unit
+tests. The python unittest library (see
+https://docs.python.org/2/library/unittest.html) offers much more flexibility
+than described here.
+
+For helping python's unittest to discover your tests, do the following:
* create a subdirectory within oletools/tests/
- The directory name must be a valid python package name,
diff --git a/tests/json/__init__.py b/tests/json/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/json/__init__.py
diff --git a/tests/json/test_output.py b/tests/json/test_output.py
new file mode 100644
index 0000000..db0655e
--- /dev/null
+++ b/tests/json/test_output.py
@@ -0,0 +1,95 @@
+""" Test validity of json output
+
+Some scripts have a json output flag. Verify that at default log levels output
+can be captured as-is and parsed by a json parser -- at least if the scripts
+return 0
+"""
+
+import unittest
+import sys
+import json
+import os
+from os.path import join
+from oletools import msodde
+from tests.test_utils import OutputCapture, DATA_BASE_DIR
+
+if sys.version_info[0] <= 2:
+ from oletools import olevba
+else:
+ from oletools import olevba3 as olevba
+
+
+class TestValidJson(unittest.TestCase):
+ """ Ensure that script output is valid json (if return code is 0) """
+
+ def iter_test_files(self):
+ """ Iterate over all test files in DATA_BASE_DIR """
+ for dirpath, _, filenames in os.walk(DATA_BASE_DIR):
+ for filename in filenames:
+ yield join(dirpath, filename)
+
+ def run_and_parse(self, program, args, print_output=False):
+ """ run single program with single file and parse output """
+ return_code = None
+ with OutputCapture() as capturer: # capture stdout
+ try:
+ return_code = program(args)
+ except Exception:
+ return_code = 1 # would result in non-zero exit
+ except SystemExit as se:
+ return_code = se.code or 0 # se.code can be None
+ if return_code is not 0:
+ if print_output:
+ print('Command failed ({0}) -- not parsing output'
+ .format(return_code))
+ return [] # no need to test
+
+ self.assertNotEqual(return_code, None,
+ msg='self-test fail: return_code not set')
+
+ # now test output
+ if print_output:
+ print(capturer.buffer.getvalue())
+ capturer.buffer.seek(0) # re-set position in file-like stream
+ try:
+ json_data = json.load(capturer.buffer)
+ except ValueError:
+ self.fail('Invalid json:\n' + capturer.buffer.getvalue())
+ self.assertNotEqual(len(json_data), 0, msg='Output was empty')
+ return json_data
+
+ def run_all_files(self, program, args_without_filename, print_output=False):
+ """ run test for a single program over all test files """
+ n_files = 0
+ for testfile in self.iter_test_files(): # loop over all input
+ args = args_without_filename + [testfile, ]
+ self.run_and_parse(program, args, print_output)
+ n_files += 1
+ self.assertNotEqual(n_files, 0,
+ msg='self-test fail: No test files found')
+
+ def test_msodde(self):
+ """ Test msodde.py """
+ self.run_all_files(msodde.main, ['-j', ])
+
+ def test_olevba(self):
+ """ Test olevba.py with default args """
+ self.run_all_files(olevba.main, ['-j', ])
+
+ def test_olevba_analysis(self):
+ """ Test olevba.py with -a """
+ self.run_all_files(olevba.main, ['-j', '-a', ])
+
+ def test_olevba_recurse(self):
+ """ Test olevba.py with -r """
+ json_data = self.run_and_parse(olevba.main,
+ ['-j', '-r', join(DATA_BASE_DIR, '*')])
+ self.assertNotEqual(len(json_data), 0,
+ msg='olevba[3] returned non-zero or no output')
+ self.assertNotEqual(json_data[-1]['n_processed'], 0,
+ msg='self-test fail: No test files found!')
+
+
+# just in case somebody calls this file as a script
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/msodde_doc/test_basic.py b/tests/msodde_doc/test_basic.py
index 0d366b1..ec6ead8 100644
--- a/tests/msodde_doc/test_basic.py
+++ b/tests/msodde_doc/test_basic.py
@@ -10,32 +10,38 @@ from __future__ import print_function
import unittest
from oletools import msodde
+from tests.test_utils import OutputCapture, DATA_BASE_DIR as BASE_DIR
import shlex
-from os.path import join, dirname, normpath
-import sys
-
-# python 2/3 version conflict:
-if sys.version_info.major <= 2:
- from StringIO import StringIO
- #from io import BytesIO as StringIO - try if print() gives UnicodeError
-else:
- from io import StringIO
-
-
-# base directory for test input
-BASE_DIR = normpath(join(dirname(__file__), '..', 'test-data'))
+from os.path import join
+from traceback import print_exc
class TestReturnCode(unittest.TestCase):
def test_valid_doc(self):
""" check that a valid doc file leads to 0 exit status """
- print(join(BASE_DIR, 'msodde-doc/test_document.doc'))
- self.do_test_validity(join(BASE_DIR, 'msodde-doc/test_document.doc'))
+ for filename in ('dde-test-from-office2003', 'dde-test-from-office2016',
+ 'harmless-clean'):
+ self.do_test_validity(join(BASE_DIR, 'msodde-doc',
+ filename + '.doc'))
def test_valid_docx(self):
""" check that a valid docx file leads to 0 exit status """
- self.do_test_validity(join(BASE_DIR, 'msodde-doc/test_document.docx'))
+ for filename in 'dde-test', 'harmless-clean':
+ self.do_test_validity(join(BASE_DIR, 'msodde-doc',
+ filename + '.docx'))
+
+ def test_valid_docm(self):
+ """ check that a valid docm file leads to 0 exit status """
+ for filename in 'dde-test', 'harmless-clean':
+ self.do_test_validity(join(BASE_DIR, 'msodde-doc',
+ filename + '.docm'))
+
+ def test_invalid_other(self):
+ """ check that xml do not work yet """
+ for extn in '-2003.xml', '.xml':
+ self.do_test_validity(join(BASE_DIR, 'msodde-doc',
+ 'harmless-clean' + extn), True)
def test_invalid_none(self):
""" check that no file argument leads to non-zero exit status """
@@ -58,61 +64,49 @@ class TestReturnCode(unittest.TestCase):
return_code = msodde.main(args)
except Exception:
have_exception = True
+ print_exc()
except SystemExit as se: # sys.exit() was called
return_code = se.code
if se.code is None:
return_code = 0
- self.assertEqual(expect_error, have_exception or (return_code != 0))
-
-
-class OutputCapture:
- """ context manager that captures stdout """
+ self.assertEqual(expect_error, have_exception or (return_code != 0),
+ msg='Args={0}, expect={1}, exc={2}, return={3}'
+ .format(args, expect_error, have_exception,
+ return_code))
- def __init__(self):
- self.output = StringIO() # in py2, this actually is BytesIO
- def __enter__(self):
- sys.stdout = self.output
- return self
-
- def __exit__(self, exc_type, exc_value, traceback):
- sys.stdout = sys.__stdout__ # re-set to original
-
- if exc_type: # there has been an error
- print('Got error during output capture!')
- print('Print captured output and re-raise:')
- for line in self.output.getvalue().splitlines():
- print(line.rstrip()) # print output before re-raising
-
- def __iter__(self):
- for line in self.output.getvalue().splitlines():
- yield line.rstrip() # remove newline at end of line
+class TestDdeInDoc(unittest.TestCase):
+ def get_dde_from_output(self, capturer):
+ """ helper to read dde links from captured output """
+ have_start_line = False
+ result = []
+ for line in capturer:
+ if not line.strip():
+ continue # skip empty lines
+ if have_start_line:
+ result.append(line)
+ elif line == 'DDE Links:':
+ have_start_line = True
-class TestDdeInDoc(unittest.TestCase):
+ self.assertTrue(have_start_line) # ensure output was complete
+ return result
def test_with_dde(self):
""" check that dde links appear on stdout """
with OutputCapture() as capturer:
- msodde.main([join(BASE_DIR, 'msodde-doc', 'dde-test.doc')])
-
- for line in capturer:
- print(line)
- pass # we just want to get the last line
-
- self.assertNotEqual(len(line.strip()), 0)
+ msodde.main([join(BASE_DIR, 'msodde-doc',
+ 'dde-test-from-office2003.doc')])
+ self.assertNotEqual(len(self.get_dde_from_output(capturer)), 0,
+ msg='Found no dde links in output for doc file')
def test_no_dde(self):
""" check that no dde links appear on stdout """
with OutputCapture() as capturer:
- msodde.main([join(BASE_DIR, 'msodde-doc', 'test_document.doc')])
-
- for line in capturer:
- print(line)
- pass # we just want to get the last line
-
- self.assertEqual(line.strip(), '')
+ msodde.main([join(BASE_DIR, 'msodde-doc', 'harmless-clean.doc')])
+ self.assertEqual(len(self.get_dde_from_output(capturer)), 0,
+ msg='Found dde links in output for doc file')
if __name__ == '__main__':
diff --git a/tests/rtfobj/test_issue_185.py b/tests/rtfobj/test_issue_185.py
index cf8358e..a395a67 100644
--- a/tests/rtfobj/test_issue_185.py
+++ b/tests/rtfobj/test_issue_185.py
@@ -1,6 +1,6 @@
import unittest, sys, os
-from .. import testdata_reader
+from tests.test_utils import testdata_reader
from oletools import rtfobj
class TestRtfObjIssue185(unittest.TestCase):
diff --git a/tests/test-data/msodde-doc/dde-test.doc b/tests/test-data/msodde-doc/dde-test-from-office2003.doc
index da5562c..da5562c 100644
--- a/tests/test-data/msodde-doc/dde-test.doc
+++ b/tests/test-data/msodde-doc/dde-test-from-office2003.doc
diff --git a/tests/test-data/msodde-doc/dde-test-from-office2016.doc b/tests/test-data/msodde-doc/dde-test-from-office2016.doc
new file mode 100644
index 0000000..563de19
--- /dev/null
+++ b/tests/test-data/msodde-doc/dde-test-from-office2016.doc
diff --git a/tests/test-data/msodde-doc/dde-test.docm b/tests/test-data/msodde-doc/dde-test.docm
new file mode 100644
index 0000000..ee5362a
--- /dev/null
+++ b/tests/test-data/msodde-doc/dde-test.docm
diff --git a/tests/test-data/msodde-doc/dde-test.docx b/tests/test-data/msodde-doc/dde-test.docx
new file mode 100644
index 0000000..5fba6b2
--- /dev/null
+++ b/tests/test-data/msodde-doc/dde-test.docx
diff --git a/tests/test-data/msodde-doc/harmless-clean-2003.xml b/tests/test-data/msodde-doc/harmless-clean-2003.xml
new file mode 100644
index 0000000..477069f
--- /dev/null
+++ b/tests/test-data/msodde-doc/harmless-clean-2003.xml
@@ -0,0 +1,3 @@
+
+
+useruser202017-10-26T09:10:00Z2017-10-26T09:10:00Z1392502128816TestThis is a harmless test document.It contains neither macros nor dde links nor embedded viruses nor links to evil web pages. Not even a single insult. Boring!Just to make things slightly interesting, however, we add some ünicöde-ßtringß and different text sizes, colors and fonts
\ No newline at end of file
diff --git a/tests/test-data/msodde-doc/harmless-clean.doc b/tests/test-data/msodde-doc/harmless-clean.doc
new file mode 100644
index 0000000..38fcf72
--- /dev/null
+++ b/tests/test-data/msodde-doc/harmless-clean.doc
diff --git a/tests/test-data/msodde-doc/harmless-clean.docm b/tests/test-data/msodde-doc/harmless-clean.docm
new file mode 100644
index 0000000..f234cae
--- /dev/null
+++ b/tests/test-data/msodde-doc/harmless-clean.docm
diff --git a/tests/test-data/msodde-doc/harmless-clean.docx b/tests/test-data/msodde-doc/harmless-clean.docx
new file mode 100644
index 0000000..59099f3
--- /dev/null
+++ b/tests/test-data/msodde-doc/harmless-clean.docx
diff --git a/tests/test-data/msodde-doc/harmless-clean.xml b/tests/test-data/msodde-doc/harmless-clean.xml
new file mode 100644
index 0000000..cd1e53c
--- /dev/null
+++ b/tests/test-data/msodde-doc/harmless-clean.xml
@@ -0,0 +1,3 @@
+
+
+TestThis is a harmless test document.It contains neither macros nor dde links nor embedded viruses nor links to evil web pages. Not even a single insult. Boring!Just to make things slightly interesting, however, we add some ünicöde-ßtringß and different text sizes, colors and fontsNormal0139250Microsoft Office Word021falseTitel1false288falsefalse16.0000useruser22017-10-26T09:10:00Z2017-10-26T09:10:00Z
\ No newline at end of file
diff --git a/tests/test-data/msodde-doc/test_document.doc b/tests/test-data/msodde-doc/test_document.doc
deleted file mode 100644
index 2c1768f..0000000
--- a/tests/test-data/msodde-doc/test_document.doc
+++ /dev/null
diff --git a/tests/test-data/msodde-doc/test_document.docx b/tests/test-data/msodde-doc/test_document.docx
deleted file mode 100644
index 4dd2265..0000000
--- a/tests/test-data/msodde-doc/test_document.docx
+++ /dev/null
diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py
new file mode 100644
index 0000000..fca8642
--- /dev/null
+++ b/tests/test_utils/__init__.py
@@ -0,0 +1,6 @@
+from .output_capture import OutputCapture
+
+from os.path import dirname, join
+
+# Directory with test data, independent of current working directory
+DATA_BASE_DIR = join(dirname(dirname(__file__)), 'test-data')
diff --git a/tests/test_utils/output_capture.py b/tests/test_utils/output_capture.py
new file mode 100644
index 0000000..686bc38
--- /dev/null
+++ b/tests/test_utils/output_capture.py
@@ -0,0 +1,50 @@
+""" class OutputCapture to test what scripts print to stdout """
+
+from __future__ import print_function
+import sys
+
+
+# python 2/3 version conflict:
+if sys.version_info.major <= 2:
+ from StringIO import StringIO
+else:
+ from io import StringIO
+
+class OutputCapture:
+ """ context manager that captures stdout
+
+ use as follows::
+
+ with OutputCapture() as capturer:
+ run_my_script(some_args)
+
+ # either test line-by-line ...
+ for line in capturer:
+ some_test(line)
+ # ...or test all output in one go
+ some_test(capturer.buffer.getvalue())
+
+ """
+
+ def __init__(self):
+ self.buffer = StringIO()
+ self.orig_stdout = None
+
+ def __enter__(self):
+ # replace sys.stdout with own buffer.
+ self.orig_stdout = sys.stdout
+ sys.stdout = self.buffer
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ sys.stdout = self.orig_stdout # re-set to original
+
+ if exc_type: # there has been an error
+ print('Got error during output capture!')
+ print('Print captured output and re-raise:')
+ for line in self.buffer.getvalue().splitlines():
+ print(line.rstrip()) # print output before re-raising
+
+ def __iter__(self):
+ for line in self.buffer.getvalue().splitlines():
+ yield line.rstrip() # remove newline at end of line
diff --git a/tests/test_utils/testdata_reader.py b/tests/test_utils/testdata_reader.py
new file mode 100644
index 0000000..4445024
--- /dev/null
+++ b/tests/test_utils/testdata_reader.py
@@ -0,0 +1,8 @@
+import os
+from os.path import dirname, abspath, normpath, join
+from . import DATA_BASE_DIR
+
+
+def read(relative_path):
+ with open(join(DATA_BASE_DIR, relative_path), 'rb') as file_handle:
+ return file_handle.read()
diff --git a/tests/testdata_reader.py b/tests/testdata_reader.py
deleted file mode 100644
index e474141..0000000
--- a/tests/testdata_reader.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import os
-
-def read(relative_path):
- test_data = os.path.dirname(os.path.abspath(__file__)) + '/test-data/'
- return open(test_data + relative_path, 'rb').read()
-