Merge pull request #796 from christian-intra2net/unittest-automation

Run unittests automatically on github

Merge pull request #796 from christian-intra2net/unittest-automation
Run unittests automatically on github
Philippe Lagadec · GitHub
2 parents 78b2d459 3b7a4ea8
Showing 28 changed files with 414 additions and 173 deletions
.github/workflows/unittests.yml
oletools/common/io_encoding.py
oletools/common/log_helper/_json_formatter.py
oletools/common/log_helper/_logger_adapter.py
oletools/common/log_helper/log_helper.py
oletools/doc/Contribute.md
oletools/mraptor_milter.py
oletools/msodde.py
oletools/oleobj.py
oletools/olevba.py
oletools/ooxml.py
oletools/ppt_parser.py
oletools/rtfobj.py
oletools/xls_parser.py
tests/common/log_helper/log_helper_test_imported.py
tests/common/log_helper/log_helper_test_main.py
tests/common/log_helper/test_log_helper.py
tests/common/test_encoding_handler.py
tests/ftguess/test_basic.py
tests/msodde/test_basic.py
+name: Python package
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+jobs:
+  check:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: ["ubuntu-latest", "windows-latest", "macos-latest"]
+        python-version: ["3.x", "pypy-3.9"]
+        include:
+          - python-version: 3.x
+            runlint: 1
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -c "import sys; import platform; print(sys.version); print(sys.platform); print(platform.python_implementation()); print(platform.system())"
+          cat requirements.txt
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install pylint
+      - name: Run pylint
+        if: ${{ matrix.runlint }}
+        run: pylint -E --ignore=thirdparty oletools tests
+      - name: Run unittests
+        run: python -m unittest discover -f
 \ No newline at end of file
@@ -58,7 +58,7 @@ PY3 = sys.version_info.major &gt;= 3
 if PY3:
     from builtins import open as builtin_open
 else:
-    from __builtin__ import open as builtin_open
+    from __builtin__ import open as builtin_open    # pylint: disable=import-error
 # -- CONSTANTS ----------------------------------------------------------------
 #: encoding to use for redirection if no good encoding can be found
@@ -5,12 +5,17 @@ import json
 class JsonFormatter(logging.Formatter):
     """
     Format every message to be logged as a JSON object
+
+    Uses the standard :py:class:`logging.Formatter` with standard arguments
+    to do the actual formatting, could save and use a user-supplied formatter
+    instead.
     """
     _is_first_line = True
     def __init__(self, other_logger_has_first_line=False):
         if other_logger_has_first_line:
             self._is_first_line = False
+        self.msg_formatter = logging.Formatter()    # could adjust this
     def format(self, record):
         """
@@ -18,12 +23,34 @@ class JsonFormatter(logging.Formatter):
         the output JSON-compatible. The only exception is when printing the first line,
         so we need to keep track of it.
-        We assume that all input comes from the OletoolsLoggerAdapter which
-        ensures that there is a `type` field in the record. Otherwise will have
-        to add a try-except around the access to `record.type`.
+        The actual conversion from :py:class:`logging.LogRecord` to a text message
+        (i.e. %-formatting, adding exception information, etc.) is delegated to the
+        standard :py:class:`logging.Formatter.
+
+        The dumped json structure contains fields `msg` with the formatted message,
+        `level` with the log-level of the message and `type`, which is created by
+        :py:class:`oletools.common.log_helper.OletoolsLoggerAdapter` or added here
+        (for input from e.g. captured warnings, third-party libraries)
         """
-        json_dict = dict(msg=record.msg.replace('\n', ' '), level=record.levelname)
-        json_dict['type'] = record.type
+        json_dict = dict(msg='', level='', type='')
+        try:
+            msg = self.msg_formatter.format(record)
+            json_dict['msg'] = msg.replace('\n', ' ')
+            json_dict['level'] = record.levelname
+            json_dict['type'] = record.type
+        except AttributeError:     # most probably: record has no "type" field
+            if record.name == 'py.warnings':   # this is from python's warning-capture logger
+                json_dict['type'] = 'warning'
+            else:
+                json_dict['type'] = 'msg'      # message of unknown origin
+        except Exception as exc:
+            try:
+                json_dict['msg'] = "Ignore {0} when formatting '{1}': {2}".format(type(exc), record.msg, exc)
+            except Exception as exc2:
+                json_dict['msg'] = 'Caught {0} in logging'.format(str(exc2))
+            json_dict['type'] = 'log-warning'
+            json_dict['level'] = 'warning'
+
         formatted_message = '    ' + json.dumps(json_dict)
         if self._is_first_line:
@@ -7,6 +7,7 @@ class OletoolsLoggerAdapter(logging.LoggerAdapter):
     Adapter class for all loggers returned by the logging module.
     """
     _json_enabled = None
+    _is_warn_logger = False   # this is always False
     def print_str(self, message, **kwargs):
         """
@@ -44,7 +45,10 @@ class OletoolsLoggerAdapter(logging.LoggerAdapter):
             kwargs['extra']['type'] = kwargs['type']
             del kwargs['type']    # downstream loggers cannot deal with this
         if 'type' not in kwargs['extra']:
-            kwargs['extra']['type'] = 'msg'   # type will be added to LogRecord
+            if self._is_warn_logger:
+                kwargs['extra']['type'] = 'warning'    # this will add field
+            else:
+                kwargs['extra']['type'] = 'msg'        # 'type' to LogRecord
         return msg, kwargs
     def set_json_enabled_function(self, json_enabled):
@@ -53,6 +57,12 @@ class OletoolsLoggerAdapter(logging.LoggerAdapter):
         """
         self._json_enabled = json_enabled
+    def set_warnings_logger(self):
+        """Make this the logger for warnings"""
+        # create a object attribute that shadows the class attribute which is
+        # always False
+        self._is_warn_logger = True
+
     def level(self):
         """Return current level of logger."""
         return self.logger.level
@@ -152,6 +152,11 @@ class LogHelper:
         self._use_json = use_json
         sys.excepthook = self._get_except_hook(sys.excepthook)
+        # make sure warnings do not mess up our output
+        logging.captureWarnings(True)
+        warn_logger = self.get_or_create_silent_logger('py.warnings')
+        warn_logger.set_warnings_logger()
+
         # since there could be loggers already created we go through all of them
         # and set their levels to 0 so they will use the root logger's level
         for name in self._all_names:
@@ -174,6 +179,7 @@ class LogHelper:
         # end logging
         self._all_names = set()
+        logging.captureWarnings(False)
         logging.shutdown()
         # end json list
@@ -13,6 +13,8 @@ to **send feedback**.
 The code is available in [a repository on GitHub](https://github.com/decalage2/oletools).
 You may use it to **submit enhancements** using forks and pull requests.
+When submitting a PR, GitHub will automatically check that unittests pass and
+`pylint -E` does not report anything for the code files you changed.
 --------------------------------------------------------------------------
@@ -69,7 +69,7 @@ __version__ = &#39;0.51&#39;
 # --- IMPORTS ----------------------------------------------------------------
-import Milter
+import Milter         # not part of requirements, therefore: # pylint: disable=import-error
 import io
 import time
 import email
@@ -78,7 +78,7 @@ import os
 import logging
 import logging.handlers
 import datetime
-import StringIO
+import StringIO      # not part of requirements, therefore: # pylint: disable=import-error
 from socket import AF_INET6
@@ -96,7 +96,7 @@ if not _parent_dir in sys.path:
 from oletools import olevba, mraptor
-from Milter.utils import parse_addr
+from Milter.utils import parse_addr  # not part of requirements, therefore: # pylint: disable=import-error
 from zipfile import is_zipfile
@@ -389,7 +389,7 @@ if __name__ == &quot;__main__&quot;:
     # Using daemonize:
     # See http://daemonize.readthedocs.io/en/latest/
-    from daemonize import Daemonize
+    from daemonize import Daemonize    # not part of requirements, therefore: # pylint: disable=import-error
     daemon = Daemonize(app="mraptor_milter", pid=PIDFILE, action=main)
     daemon.start()
@@ -149,69 +149,69 @@ LOCATIONS = (&#39;word/document.xml&#39;, &#39;word/endnotes.xml&#39;, &#39;word/footnotes.xml&#39;,
 #          switches_with_args, switches_without_args, format_switches)
 FIELD_BLACKLIST = (
     # date and time:
-    ('CREATEDATE', 0, 0, '', 'hs',  'datetime'),                 # pylint: disable=bad-whitespace
-    ('DATE',       0, 0, '', 'hls', 'datetime'),                 # pylint: disable=bad-whitespace
-    ('EDITTIME',   0, 0, '', '',    'numeric'),                  # pylint: disable=bad-whitespace
-    ('PRINTDATE',  0, 0, '', 'hs',  'datetime'),                 # pylint: disable=bad-whitespace
-    ('SAVEDATE',   0, 0, '', 'hs',  'datetime'),                 # pylint: disable=bad-whitespace
-    ('TIME',       0, 0, '', '',    'datetime'),                 # pylint: disable=bad-whitespace
+    ('CREATEDATE', 0, 0, '', 'hs',  'datetime'),
+    ('DATE',       0, 0, '', 'hls', 'datetime'),
+    ('EDITTIME',   0, 0, '', '',    'numeric'),
+    ('PRINTDATE',  0, 0, '', 'hs',  'datetime'),
+    ('SAVEDATE',   0, 0, '', 'hs',  'datetime'),
+    ('TIME',       0, 0, '', '',    'datetime'),
     # exclude document automation (we hate the "auto" in "automation")
     # (COMPARE, DOCVARIABLE, GOTOBUTTON, IF, MACROBUTTON, PRINT)
     # document information
-    ('AUTHOR',      0, 1, '', '',   'string'),                   # pylint: disable=bad-whitespace
-    ('COMMENTS',    0, 1, '', '',   'string'),                   # pylint: disable=bad-whitespace
-    ('DOCPROPERTY', 1, 0, '', '',   'string/numeric/datetime'),  # pylint: disable=bad-whitespace
-    ('FILENAME',    0, 0, '', 'p',  'string'),                   # pylint: disable=bad-whitespace
-    ('FILESIZE',    0, 0, '', 'km', 'numeric'),                  # pylint: disable=bad-whitespace
-    ('KEYWORDS',    0, 1, '', '',   'string'),                   # pylint: disable=bad-whitespace
-    ('LASTSAVEDBY', 0, 0, '', '',   'string'),                   # pylint: disable=bad-whitespace
-    ('NUMCHARS',    0, 0, '', '',   'numeric'),                  # pylint: disable=bad-whitespace
-    ('NUMPAGES',    0, 0, '', '',   'numeric'),                  # pylint: disable=bad-whitespace
-    ('NUMWORDS',    0, 0, '', '',   'numeric'),                  # pylint: disable=bad-whitespace
-    ('SUBJECT',     0, 1, '', '',   'string'),                   # pylint: disable=bad-whitespace
-    ('TEMPLATE',    0, 0, '', 'p',  'string'),                   # pylint: disable=bad-whitespace
-    ('TITLE',       0, 1, '', '',   'string'),                   # pylint: disable=bad-whitespace
+    ('AUTHOR',      0, 1, '', '',   'string'),
+    ('COMMENTS',    0, 1, '', '',   'string'),
+    ('DOCPROPERTY', 1, 0, '', '',   'string/numeric/datetime'),
+    ('FILENAME',    0, 0, '', 'p',  'string'),
+    ('FILESIZE',    0, 0, '', 'km', 'numeric'),
+    ('KEYWORDS',    0, 1, '', '',   'string'),
+    ('LASTSAVEDBY', 0, 0, '', '',   'string'),
+    ('NUMCHARS',    0, 0, '', '',   'numeric'),
+    ('NUMPAGES',    0, 0, '', '',   'numeric'),
+    ('NUMWORDS',    0, 0, '', '',   'numeric'),
+    ('SUBJECT',     0, 1, '', '',   'string'),
+    ('TEMPLATE',    0, 0, '', 'p',  'string'),
+    ('TITLE',       0, 1, '', '',   'string'),
     # equations and formulas
     # exlude '=' formulae because they have different syntax (and can be bad)
-    ('ADVANCE', 0, 0, 'dlruxy', '', ''),                         # pylint: disable=bad-whitespace
-    ('SYMBOL',  1, 0, 'fs', 'ahju', ''),                         # pylint: disable=bad-whitespace
+    ('ADVANCE', 0, 0, 'dlruxy', '', ''),
+    ('SYMBOL',  1, 0, 'fs', 'ahju', ''),
     # form fields
-    ('FORMCHECKBOX', 0, 0, '', '', ''),                          # pylint: disable=bad-whitespace
-    ('FORMDROPDOWN', 0, 0, '', '', ''),                          # pylint: disable=bad-whitespace
-    ('FORMTEXT', 0, 0, '', '', ''),                              # pylint: disable=bad-whitespace
+    ('FORMCHECKBOX', 0, 0, '', '', ''),
+    ('FORMDROPDOWN', 0, 0, '', '', ''),
+    ('FORMTEXT', 0, 0, '', '', ''),
     # index and tables
-    ('INDEX', 0, 0, 'bcdefghklpsz', 'ry', ''),                   # pylint: disable=bad-whitespace
+    ('INDEX', 0, 0, 'bcdefghklpsz', 'ry', ''),
     # exlude RD since that imports data from other files
-    ('TA',  0, 0, 'clrs', 'bi', ''),                             # pylint: disable=bad-whitespace
-    ('TC',  1, 0, 'fl', 'n', ''),                                # pylint: disable=bad-whitespace
-    ('TOA', 0, 0, 'bcdegls', 'fhp', ''),                         # pylint: disable=bad-whitespace
-    ('TOC', 0, 0, 'abcdflnopst', 'huwxz', ''),                   # pylint: disable=bad-whitespace
-    ('XE',  1, 0, 'frty', 'bi', ''),                             # pylint: disable=bad-whitespace
+    ('TA',  0, 0, 'clrs', 'bi', ''),
+    ('TC',  1, 0, 'fl', 'n', ''),
+    ('TOA', 0, 0, 'bcdegls', 'fhp', ''),
+    ('TOC', 0, 0, 'abcdflnopst', 'huwxz', ''),
+    ('XE',  1, 0, 'frty', 'bi', ''),
     # links and references
     # exclude AUTOTEXT and AUTOTEXTLIST since we do not like stuff with 'AUTO'
-    ('BIBLIOGRAPHY', 0, 0, 'lfm', '', ''),                       # pylint: disable=bad-whitespace
-    ('CITATION', 1, 0, 'lfspvm', 'nty', ''),                     # pylint: disable=bad-whitespace
+    ('BIBLIOGRAPHY', 0, 0, 'lfm', '', ''),
+    ('CITATION', 1, 0, 'lfspvm', 'nty', ''),
     # exclude HYPERLINK since we are allergic to URLs
     # exclude INCLUDEPICTURE and INCLUDETEXT (other file or maybe even URL?)
     # exclude LINK and REF (could reference other files)
-    ('NOTEREF', 1, 0, '', 'fhp', ''),                            # pylint: disable=bad-whitespace
-    ('PAGEREF', 1, 0, '', 'hp', ''),                             # pylint: disable=bad-whitespace
-    ('QUOTE', 1, 0, '', '', 'datetime'),                         # pylint: disable=bad-whitespace
-    ('STYLEREF', 1, 0, '', 'lnprtw', ''),                        # pylint: disable=bad-whitespace
+    ('NOTEREF', 1, 0, '', 'fhp', ''),
+    ('PAGEREF', 1, 0, '', 'hp', ''),
+    ('QUOTE', 1, 0, '', '', 'datetime'),
+    ('STYLEREF', 1, 0, '', 'lnprtw', ''),
     # exclude all Mail Merge commands since they import data from other files
     # (ADDRESSBLOCK, ASK, COMPARE, DATABASE, FILLIN, GREETINGLINE, IF,
     #  MERGEFIELD, MERGEREC, MERGESEQ, NEXT, NEXTIF, SET, SKIPIF)
     # Numbering
-    ('LISTNUM',      0, 1, 'ls', '', ''),                        # pylint: disable=bad-whitespace
-    ('PAGE',         0, 0, '', '', 'numeric'),                   # pylint: disable=bad-whitespace
-    ('REVNUM',       0, 0, '', '', ''),                          # pylint: disable=bad-whitespace
-    ('SECTION',      0, 0, '', '', 'numeric'),                   # pylint: disable=bad-whitespace
-    ('SECTIONPAGES', 0, 0, '', '', 'numeric'),                   # pylint: disable=bad-whitespace
-    ('SEQ',          1, 1, 'rs', 'chn', 'numeric'),              # pylint: disable=bad-whitespace
-    # user information                                           # pylint: disable=bad-whitespace
-    ('USERADDRESS', 0, 1, '', '', 'string'),                     # pylint: disable=bad-whitespace
-    ('USERINITIALS', 0, 1, '', '', 'string'),                    # pylint: disable=bad-whitespace
-    ('USERNAME', 0, 1, '', '', 'string'),                        # pylint: disable=bad-whitespace
+    ('LISTNUM',      0, 1, 'ls', '', ''),
+    ('PAGE',         0, 0, '', '', 'numeric'),
+    ('REVNUM',       0, 0, '', '', ''),
+    ('SECTION',      0, 0, '', '', 'numeric'),
+    ('SECTIONPAGES', 0, 0, '', '', 'numeric'),
+    ('SEQ',          1, 1, 'rs', 'chn', 'numeric'),
+    # user information
+    ('USERADDRESS', 0, 1, '', '', 'string'),
+    ('USERINITIALS', 0, 1, '', '', 'string'),
+    ('USERNAME', 0, 1, '', '', 'string'),
 )
 FIELD_DDE_REGEX = re.compile(r'^\s*dde(auto)?\s+', re.I)
@@ -180,7 +180,7 @@ if sys.version_info[0] &lt;= 2:
     NULL_CHAR = '\x00'
 else:
     # Python 3.x
-    NULL_CHAR = 0     # pylint: disable=redefined-variable-type
+    NULL_CHAR = 0
     xrange = range    # pylint: disable=redefined-builtin, invalid-name
 OOXML_RELATIONSHIP_TAG = '{http://schemas.openxmlformats.org/package/2006/relationships}Relationship'
@@ -3104,7 +3104,7 @@ class VBA_Parser(object):
         log.info('Check whether OLE file is PPT')
         try:
             ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True)
-            for vba_data in ppt.iter_vba_data():
+            for vba_data in ppt.iter_vba_data():    # pylint: disable=no-value-for-parameter
                 self.append_subfile(None, vba_data, container='PptParser')
             log.info('File is PPT')
             self.ole_file.close()  # just in case
@@ -160,7 +160,7 @@ def debug_str(elem):
 def isstr(some_var):
     """ version-independent test for isinstance(some_var, (str, unicode)) """
     if sys.version_info.major == 2:
-        return isinstance(some_var, basestring)  # true for str and unicode
+        return isinstance(some_var, basestring)  # true for str and unicode   # pylint: disable=undefined-variable
     return isinstance(some_var, str)         # there is no unicode
@@ -1377,7 +1377,7 @@ class PptParser(object):
         # first identified in step 3 of Part 1, that is, the UserEditAtom
         # record closest to the end of the stream.
         if self.persist_object_directory is None:
-            self.parse_persist_object_directory()
+            self.parse_persist_object_directory()    # pylint: disable=no-value-for-parameter
         # Step 2: Lookup the value of the docPersistIdRef field in the persist
         # object directory constructed in step 8 of Part 1 to find the stream
@@ -1462,7 +1462,7 @@ class PptParser(object):
                                 rec_len=VBAInfoAtom.RECORD_LENGTH)
         # try parse
-        for idx in self.search_pattern(pattern):
+        for idx in self.search_pattern(pattern):    # pylint: disable=no-value-for-parameter
             # assume that in stream at idx there is a VBAInfoContainer
             stream.seek(idx)
             log.debug('extracting at idx {0}'.format(idx))
@@ -1515,7 +1515,7 @@ class PptParser(object):
             pattern = obj_type.generate_pattern()
             # try parse
-            for idx in self.search_pattern(pattern):
+            for idx in self.search_pattern(pattern):    # pylint: disable=no-value-for-parameter
                 # assume a ExternalObjectStorage in stream at idx
                 stream.seek(idx)
                 log.debug('extracting at idx {0}'.format(idx))
@@ -1589,7 +1589,7 @@ class PptParser(object):
         n_infos = 0
         n_macros = 0
-        for info in self.search_vba_info():
+        for info in self.search_vba_info(stream):
             n_infos += 1
             if info.vba_info_atom.f_has_macros > 0:
                 n_macros += 1
@@ -1597,13 +1597,13 @@ class PptParser(object):
         #       --> no vba-info, so all storages probably ActiveX or other OLE
         n_storages = 0
         n_compressed = 0
-        for storage in self.search_vba_storage():
+        for storage in self.search_vba_storage():    # pylint: disable=no-value-for-parameter
             n_storages += 1
             if storage.is_compressed:
                 n_compressed += 1
-                yield self.decompress_vba_storage(storage)
+                yield self.decompress_vba_storage(storage)    # pylint: disable=no-value-for-parameter
             else:
-                yield self.read_vba_storage_data(storage)
+                yield self.read_vba_storage_data(storage)    # pylint: disable=no-value-for-parameter
         log.info('found {0} infos ({1} with macros) and {2} storages '
                  '({3} compressed)'
@@ -337,7 +337,7 @@ if sys.version_info[0] &lt;= 2:
     BACKSLASH = '\\'
     BRACE_OPEN = '{'
     BRACE_CLOSE = '}'
-    UNICODE_TYPE = unicode
+    UNICODE_TYPE = unicode   # pylint: disable=undefined-variable
 else:
     # Python 3.x - Integers
     BACKSLASH = ord('\\')
@@ -229,46 +229,46 @@ class XlsbStream(record_base.OleRecordStream):
 # records that appear often but do not need their own XlsRecord subclass (yet)
 FREQUENT_RECORDS = dict([
-    ( 156, 'BuiltInFnGroupCount'),             # pylint: disable=bad-whitespace
-    (2147, 'BookExt'),                         # pylint: disable=bad-whitespace
-    ( 442, 'CodeName'),                        # pylint: disable=bad-whitespace
-    (  66, 'CodePage'),                        # pylint: disable=bad-whitespace
-    (4195, 'Dat'),                             # pylint: disable=bad-whitespace
-    (2154, 'DataLabExt'),                      # pylint: disable=bad-whitespace
-    (2155, 'DataLabExtContents'),              # pylint: disable=bad-whitespace
-    ( 215, 'DBCell'),                          # pylint: disable=bad-whitespace
-    ( 220, 'DbOrParmQry'),                     # pylint: disable=bad-whitespace
-    (2051, 'DBQueryExt'),                      # pylint: disable=bad-whitespace
-    (2166, 'DConn'),                           # pylint: disable=bad-whitespace
-    (  35, 'ExternName'),                      # pylint: disable=bad-whitespace
-    (  23, 'ExternSheet'),                     # pylint: disable=bad-whitespace
-    ( 255, 'ExtSST'),                          # pylint: disable=bad-whitespace
-    (2052, 'ExtString'),                       # pylint: disable=bad-whitespace
-    (2151, 'FeatHdr'),                         # pylint: disable=bad-whitespace
-    (  91, 'FileSharing'),                     # pylint: disable=bad-whitespace
-    (1054, 'Format'),                          # pylint: disable=bad-whitespace
-    (  49, 'Font'),                            # pylint: disable=bad-whitespace
-    (2199, 'GUIDTypeLib'),                     # pylint: disable=bad-whitespace
-    ( 440, 'HLink'),                           # pylint: disable=bad-whitespace
-    ( 225, 'InterfaceHdr'),                    # pylint: disable=bad-whitespace
-    ( 226, 'InterfaceEnd'),                    # pylint: disable=bad-whitespace
-    ( 523, 'Index'),                           # pylint: disable=bad-whitespace
-    (  24, 'Lbl'),                             # pylint: disable=bad-whitespace
-    ( 193, 'Mms'),                             # pylint: disable=bad-whitespace
-    (  93, 'Obj'),                             # pylint: disable=bad-whitespace
-    (4135, 'ObjectLink'),                      # pylint: disable=bad-whitespace
-    (2058, 'OleDbConn'),                       # pylint: disable=bad-whitespace
-    ( 222, 'OleObjectSize'),                   # pylint: disable=bad-whitespace
-    (2214, 'RichTextStream'),                  # pylint: disable=bad-whitespace
-    (2146, 'SheetExt'),                        # pylint: disable=bad-whitespace
-    (1212, 'ShrFmla'),                         # pylint: disable=bad-whitespace
-    (2060, 'SxViewExt'),                       # pylint: disable=bad-whitespace
-    (2136, 'SxViewLink'),                      # pylint: disable=bad-whitespace
-    (2049, 'WebPub'),                          # pylint: disable=bad-whitespace
-    ( 224, 'XF (formatting)'),                 # pylint: disable=bad-whitespace
-    (2173, 'XFExt (formatting)'),              # pylint: disable=bad-whitespace
-    ( 659, 'Style'),                           # pylint: disable=bad-whitespace
-    (2194, 'StyleExt')                         # pylint: disable=bad-whitespace
+    ( 156, 'BuiltInFnGroupCount'),
+    (2147, 'BookExt'),
+    ( 442, 'CodeName'),
+    (  66, 'CodePage'),
+    (4195, 'Dat'),
+    (2154, 'DataLabExt'),
+    (2155, 'DataLabExtContents'),
+    ( 215, 'DBCell'),
+    ( 220, 'DbOrParmQry'),
+    (2051, 'DBQueryExt'),
+    (2166, 'DConn'),
+    (  35, 'ExternName'),
+    (  23, 'ExternSheet'),
+    ( 255, 'ExtSST'),
+    (2052, 'ExtString'),
+    (2151, 'FeatHdr'),
+    (  91, 'FileSharing'),
+    (1054, 'Format'),
+    (  49, 'Font'),
+    (2199, 'GUIDTypeLib'),
+    ( 440, 'HLink'),
+    ( 225, 'InterfaceHdr'),
+    ( 226, 'InterfaceEnd'),
+    ( 523, 'Index'),
+    (  24, 'Lbl'),
+    ( 193, 'Mms'),
+    (  93, 'Obj'),
+    (4135, 'ObjectLink'),
+    (2058, 'OleDbConn'),
+    ( 222, 'OleObjectSize'),
+    (2214, 'RichTextStream'),
+    (2146, 'SheetExt'),
+    (1212, 'ShrFmla'),
+    (2060, 'SxViewExt'),
+    (2136, 'SxViewLink'),
+    (2049, 'WebPub'),
+    ( 224, 'XF (formatting)'),
+    (2173, 'XFExt (formatting)'),
+    ( 659, 'Style'),
+    (2194, 'StyleExt')
 ])
 #: records found in xlsb binary parts
@@ -4,6 +4,7 @@ by the main test file
 """
 from oletools.common.log_helper import log_helper
+import warnings
 DEBUG_MESSAGE = 'imported: debug log'
 INFO_MESSAGE = 'imported: info log'
@@ -11,7 +12,10 @@ WARNING_MESSAGE = &#39;imported: warning log&#39;
 ERROR_MESSAGE = 'imported: error log'
 CRITICAL_MESSAGE = 'imported: critical log'
 RESULT_MESSAGE = 'imported: result log'
+
 RESULT_TYPE = 'imported: result'
+ACTUAL_WARNING = 'Feature XYZ provided by this module might be deprecated at '\
+    'some point in the future ... or not'
 logger = log_helper.get_or_create_silent_logger('test_imported')
@@ -27,3 +31,7 @@ def log():
     logger.error(ERROR_MESSAGE)
     logger.critical(CRITICAL_MESSAGE)
     logger.info(RESULT_MESSAGE, type=RESULT_TYPE)
+
+
+def warn():
+    warnings.warn(ACTUAL_WARNING)
@@ -2,6 +2,7 @@
 import sys
 import logging
+import warnings
 from tests.common.log_helper import log_helper_test_imported
 from oletools.common.log_helper import log_helper
@@ -11,7 +12,9 @@ WARNING_MESSAGE = &#39;main: warning log&#39;
 ERROR_MESSAGE = 'main: error log'
 CRITICAL_MESSAGE = 'main: critical log'
 RESULT_MESSAGE = 'main: result log'
+
 RESULT_TYPE = 'main: result'
+ACTUAL_WARNING = 'Warnings can pop up anywhere, have to be prepared!'
 logger = log_helper.get_or_create_silent_logger('test_main')
@@ -24,7 +27,8 @@ def enable_logging():
 def main(args):
     """
-    Try to cover possible logging scenarios. For each scenario covered, here's the expected args and outcome:
+    Try to cover possible logging scenarios. For each scenario covered, here's
+    the expected args and outcome:
     - Log without enabling: ['<level>']
         * logging when being imported - should never print
     - Log as JSON without enabling: ['as-json', '<level>']
@@ -35,6 +39,8 @@ def main(args):
         * logging as JSON when being run as script - should log messages as JSON
     - Enable, log as JSON and throw: ['enable', 'as-json', 'throw', '<level>']
         * should produce JSON-compatible output, even after an unhandled exception
+    - Enable, log as JSON and warn: ['enable', 'as-json', 'warn', '<level>']
+        * should produce JSON-compatible output, even after a warning
     """
     # the level should always be the last argument passed
@@ -42,6 +48,9 @@ def main(args):
     use_json = 'as-json' in args
     throw = 'throw' in args
     percent_autoformat = '%-autoformat' in args
+    warn = 'warn' in args
+    exc_info = 'exc-info' in args
+    wrong_log_args = 'wrong-log-args' in args
     log_helper_test_imported.logger.setLevel(logging.ERROR)
@@ -53,6 +62,22 @@ def main(args):
     if throw:
         raise Exception('An exception occurred before ending the logging')
+    if warn:
+        warnings.warn(ACTUAL_WARNING)
+        log_helper_test_imported.warn()
+
+    if exc_info:
+        try:
+            raise Exception('This is an exception')
+        except Exception:
+            logger.exception('Caught exception')  # has exc_info=True
+
+    if wrong_log_args:
+        logger.info('Opening file /dangerous/file/with-%s-in-name')
+        logger.info('The result is %f')
+        logger.info('No result', 1.23)
+        logger.info('The result is %f', 'bla')
+
     log_helper.end_logging()
@@ -11,13 +11,13 @@ import json
 import subprocess
 from tests.common.log_helper import log_helper_test_main
 from tests.common.log_helper import log_helper_test_imported
+import os
 from os.path import dirname, join, relpath, abspath
 from tests.test_utils import PROJECT_ROOT
 # test file we use as "main" module
-TEST_FILE = relpath(join(dirname(abspath(__file__)), 'log_helper_test_main.py'),
-                    PROJECT_ROOT)
+TEST_FILE = join(dirname(abspath(__file__)), 'log_helper_test_main.py')
 # test file simulating a third party main module that only imports oletools
 TEST_FILE_3RD_PARTY = relpath(join(dirname(abspath(__file__)),
@@ -26,6 +26,8 @@ TEST_FILE_3RD_PARTY = relpath(join(dirname(abspath(__file__)),
 PYTHON_EXECUTABLE = sys.executable
+PERCENT_FORMAT_OUTPUT = 'The answer is 47.'
+
 class TestLogHelper(unittest.TestCase):
     def test_it_doesnt_log_when_not_enabled(self):
@@ -113,7 +115,7 @@ class TestLogHelper(unittest.TestCase):
     def test_percent_autoformat(self):
         """Test that auto-formatting of log strings with `%` works."""
         output = self._run_test(['enable', '%-autoformat', 'info'])
-        self.assertIn('The answer is 47.', output)
+        self.assertIn(PERCENT_FORMAT_OUTPUT, output)
     def test_json_correct_on_exceptions(self):
         """
@@ -141,6 +143,93 @@ class TestLogHelper(unittest.TestCase):
         self.assertIn('INFO:test_main:main: info log', output)
         self.assertIn('INFO:test_imported:imported: info log', output)
+    def test_json_correct_on_warnings(self):
+        """
+        Test that even on warnings our JSON is always correct
+        """
+        output = self._run_test(['enable', 'as-json', 'warn', 'warning'])
+        expected_messages = [
+            log_helper_test_main.WARNING_MESSAGE,
+            log_helper_test_main.ERROR_MESSAGE,
+            log_helper_test_main.CRITICAL_MESSAGE,
+            log_helper_test_imported.WARNING_MESSAGE,
+            log_helper_test_imported.ERROR_MESSAGE,
+            log_helper_test_imported.CRITICAL_MESSAGE,
+        ]
+
+        for msg in expected_messages:
+            self.assertIn(msg, output)
+
+        # last two entries of output should be warnings
+        jout = json.loads(output)
+        self.assertEqual(jout[-2]['level'], 'WARNING')
+        self.assertEqual(jout[-1]['level'], 'WARNING')
+        self.assertEqual(jout[-2]['type'], 'warning')
+        self.assertEqual(jout[-1]['type'], 'warning')
+        self.assertIn(log_helper_test_main.ACTUAL_WARNING, jout[-2]['msg'])
+        self.assertIn(log_helper_test_imported.ACTUAL_WARNING, jout[-1]['msg'])
+
+    def test_warnings(self):
+        """Check that warnings are captured and printed correctly"""
+        output = self._run_test(['enable', 'warn', 'warning'])
+
+        # find out which line contains the call to warnings.warn:
+        warnings_line = None
+        with open(TEST_FILE, 'rt') as reader:
+            for line_idx, line in enumerate(reader):
+                if 'warnings.warn' in line:
+                    warnings_line = line_idx + 1
+                    break
+        self.assertNotEqual(warnings_line, None)
+
+        imported_file = join(dirname(abspath(__file__)),
+                             'log_helper_test_imported.py')
+        imported_line = None
+        with open(imported_file, 'rt') as reader:
+            for line_idx, line in enumerate(reader):
+                if 'warnings.warn' in line:
+                    imported_line = line_idx + 1
+                    break
+        self.assertNotEqual(imported_line, None)
+
+        expect = '\n'.join([
+            'WARNING  ' + log_helper_test_main.WARNING_MESSAGE,
+            'ERROR    ' + log_helper_test_main.ERROR_MESSAGE,
+            'CRITICAL ' + log_helper_test_main.CRITICAL_MESSAGE,
+            'WARNING  ' + log_helper_test_imported.WARNING_MESSAGE,
+            'ERROR    ' + log_helper_test_imported.ERROR_MESSAGE,
+            'CRITICAL ' + log_helper_test_imported.CRITICAL_MESSAGE,
+            'WARNING  {0}:{1}: UserWarning: {2}'
+                .format(TEST_FILE, warnings_line, log_helper_test_main.ACTUAL_WARNING),
+            '  warnings.warn(ACTUAL_WARNING)',   # warnings include source line
+            '',
+            'WARNING  {0}:{1}: UserWarning: {2}'
+                .format(imported_file, imported_line, log_helper_test_imported.ACTUAL_WARNING),
+            '  warnings.warn(ACTUAL_WARNING)',   # warnings include source line
+        ])
+        self.assertEqual(output.strip(), expect)
+
+    def test_json_percent_formatting(self):
+        """Test that json-output has formatting args included in output."""
+        output = self._run_test(['enable', 'as-json', '%-autoformat', 'info'])
+        json.loads(output)    # check that this does not raise, so json is valid
+        self.assertIn(PERCENT_FORMAT_OUTPUT, output)
+
+    def test_json_exception_formatting(self):
+        """Test that json-output has formatted exception info in output"""
+        output = self._run_test(['enable', 'as-json', 'exc-info', 'info'])
+        json.loads(output)    # check that this does not raise, so json is valid
+        self.assertIn('Caught exception', output)      # actual log message
+        self.assertIn('This is an exception', output)    # message of caught exception
+        self.assertIn('Traceback (most recent call last)', output)    # start of trace
+        self.assertIn(TEST_FILE.replace('\\', '\\\\'), output)        # part of trace
+
+    def test_json_wrong_args(self):
+        """Test that too many or missing args do not raise exceptions inside logger"""
+        output = self._run_test(['enable', 'as-json', 'wrong-log-args', 'info'])
+        json.loads(output)    # check that this does not raise, so json is valid
+        # do not care about actual contents of output
+
     def _assert_json_messages(self, output, messages):
         try:
             json_data = json.loads(output)
@@ -160,8 +249,10 @@ class TestLogHelper(unittest.TestCase):
         we might get errors or false positives between sequential tests runs)
         When arg `run_third_party` is `True`, we do not run the `TEST_FILE` as
-        main moduel but the `TEST_FILE_3RD_PARTY` and return contents of
+        main module but the `TEST_FILE_3RD_PARTY` and return contents of
         `stderr` instead of `stdout`.
+
+        TODO: use tests.utils.call_and_capture
         """
         all_args = [PYTHON_EXECUTABLE, ]
         if run_third_party:
@@ -169,10 +260,12 @@ class TestLogHelper(unittest.TestCase):
         else:
             all_args.append(TEST_FILE)
         all_args.extend(args)
+        env = os.environ.copy()
+        env['PYTHONPATH'] = PROJECT_ROOT
         child = subprocess.Popen(
             all_args,
             shell=False,
-            env={'PYTHONPATH': PROJECT_ROOT},
+            env=env,
             universal_newlines=True,
             cwd=PROJECT_ROOT,
             stdin=None,
@@ -14,7 +14,13 @@ FILE_TEXT = u&#39;The unicode check mark is \u2713.\n&#39;
 @contextmanager
 def temp_file(just_name=True):
-    """Context manager that creates temp file and deletes it in the end"""
+    """
+    Context manager that creates temp file and deletes it in the end.
+
+    If `just_name` is `False` this yields (file-name, open-file-handle),
+    if `just_name` is `True` this yields just the file-name (and closes
+    the file-handle if we are on windows)
+    """
     tmp_descriptor = None
     tmp_name = None
     tmp_handle = None
@@ -24,8 +30,12 @@ def temp_file(just_name=True):
         # we create our own file handle since we want to be able to close the
         # file and open it again for reading.
         # We keep the os-level descriptor open so file name is still reserved
-        # for us
+        # for us ... except for Windows where it is not possible for another
+        # process to write to that handle
         if just_name:
+            if sys.platform.startswith('win'):
+                os.close(tmp_descriptor)
+                tmp_descriptor = None
             yield tmp_name
         else:
             tmp_handle = open(tmp_name, 'wb')
@@ -51,11 +61,7 @@ class TestEncodingHandler(unittest.TestCase):
                    shell=True)
     def test_print_redirect(self):
-        """
-        Test redirection of unicode output to files does not raise error
-
-        TODO: test this on non-linux OSs
-        """
+        """Test redirection of unicode output to files does not raise error."""
         with temp_file() as tmp_file:
             check_call('{python} {this_file} print > {tmp_file}'
                        .format(python=sys.executable, this_file=__file__,
@@ -63,7 +69,7 @@ class TestEncodingHandler(unittest.TestCase):
                        shell=True)
     @unittest.skipIf(not sys.platform.startswith('linux'),
-                     'Only tested on linux sofar')
+                     'Need to adapt this test to Windows')
     def test_print_no_lang(self):
         """
         Test redirection of unicode output to files does not raise error
@@ -89,11 +95,7 @@ class TestEncodingHandler(unittest.TestCase):
                 self.fail(cpe.output)
     def test_uopen_redirect(self):
-        """
-        Test redirection of unicode output to files does not raise error
-
-        TODO: test this on non-linux OSs
-        """
+        """Test redirection of unicode output to files does not raise error."""
         with temp_file(False) as (tmp_handle, tmp_file):
             tmp_handle.write(FILE_TEXT.encode('utf8'))
             tmp_handle.close()
@@ -109,7 +111,7 @@ class TestEncodingHandler(unittest.TestCase):
                     self.fail(cpe.output)
     @unittest.skipIf(not sys.platform.startswith('linux'),
-                     'Only tested on linux sofar')
+                     'Need to adapt this test to Windows')
     def test_uopen_no_lang(self):
         """
         Test that uopen in a C-LANG environment is ok
 """Test ftguess"""
-
 import unittest
 import os
-from os.path import splitext
+from os.path import splitext, join
 from oletools import ftguess
 # Directory with test data, independent of current working directory
@@ -47,7 +46,7 @@ class TestFTGuess(unittest.TestCase):
             before_dot, extension = splitext(filename)
             if extension == '.zip':
                 extension = splitext(before_dot)[1]
-            elif filename in ('basic/empty', 'basic/text'):
+            elif filename in (join('basic', 'empty'), join('basic', 'text')):
                 extension = '.csv'    # have just like that
             elif not extension:
                 self.fail('Could not find extension for test sample {0}'
@@ -55,7 +54,7 @@ class TestFTGuess(unittest.TestCase):
             extension = extension[1:]      # remove the leading '.'
             # encrypted files are mostly not recognized (yet?), except .xls
-            if filename.startswith('encrypted/'):
+            if filename.startswith('encrypted' + os.sep):
                 if extension == 'xls':
                     expect = ftguess.FType_Excel97
                 else:
@@ -69,7 +68,7 @@ class TestFTGuess(unittest.TestCase):
                 # not implemented yet
                 expect = ftguess.FType_Unknown
-            elif filename == 'basic/encrypted.docx':
+            elif filename == join('basic', 'encrypted.docx'):
                 expect = ftguess.FType_Generic_OLE
             elif 'excel5' in filename:
@@ -9,6 +9,7 @@ Ensure that
 from __future__ import print_function
 import unittest
+from platform import python_implementation
 import sys
 import os
 from os.path import join, basename
@@ -19,8 +20,21 @@ from tests.test_utils import call_and_capture, decrypt_sample,\
     DATA_BASE_DIR as BASE_DIR
+# Check whether we run with PyPy on windows because that causes trouble
+# when using the :py:func:`tests.test_utils.decrypt_sample`.
+#
+# :return: `(do_skip, explanation)` where `do_skip` is `True` iff running
+# PyPy on Windows; `explanation` is a simple text string
+SKIP_PYPY_WIN = (
+    python_implementation().lower().startswith('pypy')
+            and sys.platform.lower().startswith('win'),
+    "On PyPy there is a problem with deleting temp files for decrypt_sample"
+)
+
+
 class TestReturnCode(unittest.TestCase):
     """ check return codes and exception behaviour (not text output) """
+    @unittest.skipIf(*SKIP_PYPY_WIN)
     def test_valid_doc(self):
         """ check that a valid doc file leads to 0 exit status """
         for filename in (
@@ -44,6 +58,7 @@ class TestReturnCode(unittest.TestCase):
             self.do_test_validity(join(BASE_DIR, 'msodde',
                                        filename + '.docm'))
+    @unittest.skipIf(*SKIP_PYPY_WIN)
     def test_valid_xml(self):
         """ check that xml leads to 0 exit status """
         for filename in (
@@ -67,11 +82,11 @@ class TestReturnCode(unittest.TestCase):
     def test_invalid_empty(self):
         """ check that empty file argument leads to non-zero exit status """
-        self.do_test_validity(join(BASE_DIR, 'basic/empty'), Exception)
+        self.do_test_validity(join(BASE_DIR, 'basic', 'empty'), Exception)
     def test_invalid_text(self):
         """ check that text file argument leads to non-zero exit status """
-        self.do_test_validity(join(BASE_DIR, 'basic/text'), Exception)
+        self.do_test_validity(join(BASE_DIR, 'basic', 'text'), Exception)
     def test_encrypted(self):
         """
@@ -140,6 +155,7 @@ class TestDdeLinks(unittest.TestCase):
         """
         return [o for o in output.splitlines()]
+    @unittest.skipIf(*SKIP_PYPY_WIN)
     def test_with_dde(self):
         """ check that dde links appear on stdout """
         filename = 'dde-test-from-office2003.doc.zip'
@@ -158,6 +174,7 @@ class TestDdeLinks(unittest.TestCase):
         self.assertEqual(len(self.get_dde_from_output(output)), 0,
                          msg='Found dde links in output of ' + filename)
+    @unittest.skipIf(*SKIP_PYPY_WIN)
     def test_with_dde_utf16le(self):
         """ check that dde links appear on stdout """
         filename = 'dde-test-from-office2013-utf_16le-korean.doc.zip'
@@ -179,6 +196,7 @@ class TestDdeLinks(unittest.TestCase):
                              msg='unexpected output for dde-test.{0}: {1}'
                                  .format(extn, output))
+    @unittest.skipIf(*SKIP_PYPY_WIN)
     def test_xml(self):
         """ check that dde in xml from word / excel is found """
         for filename in ('dde-in-excel2003.xml',
@@ -90,14 +90,14 @@ class TestOleIDBasic(unittest.TestCase):
         for filename, value_dict in self.oleids:
             # print('Debugging: testing file {0}'.format(filename))
             self.assertEqual(value_dict['ext_rels'],
-                             '/external_link/' in filename)
+                             os.sep + 'external_link' + os.sep in filename)
     def test_objectpool(self):
         """Test indicator for ObjectPool stream in ole files."""
         for filename, value_dict in self.oleids:
             # print('Debugging: testing file {0}'.format(filename))
-            if (filename.startswith('oleobj/sample_with_')
-                        or filename.startswith('oleobj/embedded')) \
+            if (filename.startswith(join('oleobj', 'sample_with_'))
+                        or filename.startswith(join('oleobj', 'embedded'))) \
                     and (filename.endswith('.doc') 
                          or filename.endswith('.dot')):
                 self.assertTrue(value_dict['ObjectPool'])
@@ -106,6 +106,15 @@ class TestOleIDBasic(unittest.TestCase):
     def test_macros(self):
         """Test indicator for macros."""
+        find_vba = (
+            join('ooxml', 'dde-in-excel2003.xml'),    # not really
+            join('encrypted', 'autostart-encrypt-standardpassword.xls'),
+            join('msodde', 'dde-in-csv.csv'),     # "Windows" "calc.exe"
+            join('msodde', 'dde-in-excel2003.xml'),   # same as above
+            join('oleform', 'oleform-PR314.docm'),
+            join('basic', 'empty'),                   # WTF?
+            join('basic', 'text'),
+        )
         for filename, value_dict in self.oleids:
             # TODO: we need a sample file with xlm macros
             before_dot, suffix = splitext(filename)
@@ -119,18 +128,10 @@ class TestOleIDBasic(unittest.TestCase):
             self.assertIn(value_dict['xlm'], ('Unknown', 'No'))
             # "macro detection" in text files leads to interesting results:
-            if filename in ('ooxml/dde-in-excel2003.xml',    # not really
-                            'encrypted/autostart-encrypt-standardpassword.xls',
-                            'msodde/dde-in-csv.csv',     # "Windows" "calc.exe"
-                            'msodde/dde-in-excel2003.xml',   # same as above
-                            'oleform/oleform-PR314.docm',
-                            'basic/empty',                   # WTF?
-                            'basic/text',                    # no macros!
-                            'olevba/sample_with_vba.ppt',
-                            ):
+            if filename in find_vba:                   # no macros!
                 self.assertEqual(value_dict['vba'], 'Yes')
             else:
-                self.assertEqual(value_dict['vba'], 'No')
+                self.assertIn(value_dict['vba'], ('No', 'Error'))
     def test_flash(self):
         """Test indicator for flash."""
@@ -2,17 +2,17 @@
 Test if oleid detects encrypted documents
 """
-import unittest, sys, os
-
-from tests.test_utils import DATA_BASE_DIR
+import unittest
 from os.path import join
+from tests.test_utils import DATA_BASE_DIR
 from oletools import oleid
+
 class TestEncryptedDocumentDetection(unittest.TestCase):
     def test_encrypted_document_detection(self):
         """ Run oleid and check if the document is flagged as encrypted """
-        filename = join(DATA_BASE_DIR, 'basic/encrypted.docx')
+        filename = join(DATA_BASE_DIR, 'basic', 'encrypted.docx')
         oleid_instance = oleid.OleID(filename)
         indicators = oleid_instance.check()
@@ -21,6 +21,7 @@ class TestEncryptedDocumentDetection(unittest.TestCase):
         self.assertEqual(is_encrypted, True)
+
 # just in case somebody calls this file as a script
 if __name__ == '__main__':
     unittest.main()
 \ No newline at end of file
@@ -120,10 +120,14 @@ class TestOlevbaBasic(unittest.TestCase):
                                                  args=[full_name, ] + ADD_ARGS,
                                                  accept_nonzero_exit=True)
             output = json.loads(out_str)
-            self.assertEqual(len(output), 2)
+            self.assertGreaterEqual(len(output), 2)
             self.assertEqual(output[0]['type'], 'MetaInformation')
             self.assertEqual(output[0]['script_name'], 'olevba')
-            result = output[1]
+            for entry in output[1:]:
+                if entry['type'] in ('msg', 'warning'):
+                    continue    # ignore messages
+                result = entry
+                break
             self.assertTrue(result['json_conversion_successful'])
             if suffix in ('.xlsb', '.xltm', '.xlsm'):
                 # TODO: cannot extract xlm macros for these types yet
@@ -40,7 +40,7 @@ class OlevbaCryptoWriteProtectTest(unittest.TestCase):
                                          exclude_stderr=True)
             data = json.loads(output, object_pairs_hook=OrderedDict)
             # debug: json.dump(data, sys.stdout, indent=4)
-            self.assertIn(len(data), (3, 4))
+            self.assertGreaterEqual(len(data), 3)
             # first 2 parts: general info about script and file
             self.assertIn('script_name', data[0])
@@ -53,22 +53,23 @@ class OlevbaCryptoWriteProtectTest(unittest.TestCase):
             self.assertEqual(data[1]['type'], 'OLE')
             self.assertTrue(data[1]['json_conversion_successful'])
-            # possible VBA stomping warning
-            if len(data) == 4:
-                self.assertEqual(data[2]['type'], 'msg')
-                self.assertIn('VBA stomping', data[2]['msg'])
+            for entry in data[2:]:
+                if entry['type'] in ('msg', 'warning'):
+                    continue
+                result = entry
+                break
             # last part is the actual result
-            self.assertEqual(data[-1]['container'], example_file)
-            self.assertNotEqual(data[-1]['file'], example_file)
-            self.assertEqual(data[-1]['type'], "OpenXML")
-            analysis = data[-1]['analysis']
+            self.assertEqual(result['container'], example_file)
+            self.assertNotEqual(result['file'], example_file)
+            self.assertEqual(result['type'], "OpenXML")
+            analysis = result['analysis']
             self.assertEqual(analysis[0]['type'], 'AutoExec')
             self.assertEqual(analysis[0]['keyword'], 'Auto_Open')
-            macros = data[-1]['macros']
+            macros = result['macros']
             self.assertEqual(macros[0]['vba_filename'], 'Modul1.bas')
             self.assertIn('Sub Auto_Open()', macros[0]['code'])
-            self.assertTrue(data[-1]['json_conversion_successful'])
+            self.assertTrue(result['json_conversion_successful'])
 if __name__ == '__main__':
@@ -111,8 +111,8 @@ class TestZipSubFile(unittest.TestCase):
         self.assertEqual(self.subfile.tell(), self.compare.tell())
         # seek backward (only implemented case: back to start)
-        self.subfile.seek(-self.subfile.tell(), os.SEEK_CUR)
-        self.compare.seek(-self.compare.tell(), os.SEEK_CUR)
+        self.subfile.seek(-1 * self.subfile.tell(), os.SEEK_CUR)
+        self.compare.seek(-1 * self.compare.tell(), os.SEEK_CUR)
         self.assertEqual(self.subfile.read(1), self.compare.read(1))
         self.assertEqual(self.subfile.tell(), self.compare.tell())
-import unittest, sys, os
-
+import unittest
+from os.path import join
 from tests.test_utils import testdata_reader
 from oletools import rtfobj
+
 class TestRtfObjIssue185(unittest.TestCase):
     def test_skip_space_after_bin_control_word(self):
-        data = testdata_reader.read_encrypted('rtfobj/issue_185.rtf.zip')
+        data = testdata_reader.read_encrypted(join('rtfobj', 'issue_185.rtf.zip'))
         rtfp = rtfobj.RtfObjParser(data)
         rtfp.parse()
         objects = rtfp.objects
         self.assertTrue(len(objects) == 1)
+
 if __name__ == '__main__':
     unittest.main()
-import unittest, sys, os
-
+import unittest
+from os.path import join
 from tests.test_utils import testdata_reader
 from oletools import rtfobj
+
 class TestRtfObjIssue251(unittest.TestCase):
     def test_bin_no_param(self):
-        data = testdata_reader.read('rtfobj/issue_251.rtf')
+        data = testdata_reader.read(join('rtfobj', 'issue_251.rtf'))
         rtfp = rtfobj.RtfObjParser(data)
         rtfp.parse()
         objects = rtfp.objects
         self.assertTrue(len(objects) == 1)
+
 if __name__ == '__main__':
     unittest.main()
@@ -100,6 +100,9 @@ def decrypt_sample(relpath):
     Code based on test_encoding_handler.temp_file().
+    Note: this causes problems if running with PyPy on Windows. The `unlink`
+          fails because the file is "still being used by another process".
+
     :param relpath: path inside `DATA_BASE_DIR`, should end in '.zip'
     :return: absolute path name to decrypted sample.
     """