testdata_reader.py 2.58 KB
"""
Helper functions to deal with zip-encrypted test files.

Some test samples alerted antivirus software when installing oletools. Those
samples were therefore "hidden" in encrypted zip-files. These functions help
using them.
"""

import os, sys, zipfile
from os.path import dirname, abspath, normpath, relpath, join, basename
from . import DATA_BASE_DIR

# Passwort used to encrypt problematic test samples inside a zip container
ENCRYPTED_FILES_PASSWORD='infected-test'

# import zipfile in a way compatible with all kinds of old python versions
if sys.version_info[0] <= 2:
    # Python 2.x
    if sys.version_info[1] <= 6:
        # Python 2.6
        # use is_zipfile backported from Python 2.7:
        from thirdparty.zipfile27 import is_zipfile
    else:
        # Python 2.7
        from zipfile import is_zipfile
else:
    # Python 3.x+
    from zipfile import is_zipfile
    ENCRYPTED_FILES_PASSWORD = ENCRYPTED_FILES_PASSWORD.encode()


def read(relative_path):
    """
    Return contents of unencrypted file inside test data dir.

    see also: `read_encrypted`.
    """
    with open(get_path_from_root(relative_path), 'rb') as file_handle:
        return file_handle.read()


def read_encrypted(relative_path, filename=None):
    """
    Return contents of encrypted file inside test data dir.

    see also: `read`.
    """
    z = zipfile.ZipFile(get_path_from_root(relative_path))

    if filename == None:
        contents = z.read(z.namelist()[0], pwd=ENCRYPTED_FILES_PASSWORD)
    else:
        contents = z.read(filename, pwd=ENCRYPTED_FILES_PASSWORD)

    z.close()
    return contents


def get_path_from_root(relative_path):
    """Convert path relative to test data base dir to an absolute path."""
    return join(DATA_BASE_DIR, relative_path)


def loop_over_files(subdir=''):
    """
    Find all files, decrypting problematic files on the fly.

    Does a `os.walk` through all test data or the given subdir and yields a
    2-tuple for each sample: the path to the file relative to `DATA_BASE_DIR`
    and the contents of the file, with the file being unzipped first if it ends
    with .zip.

    :arg str subdir: Optional subdir of test data dir that caller is interested
                     in
    """
    # create temp dir to extract files into

    for base_dir, _, files in os.walk(join(DATA_BASE_DIR, subdir)):
        for filename in files:
            relative_path = relpath(join(base_dir, filename), DATA_BASE_DIR)
            if filename.endswith('.zip'):
                yield relative_path, read_encrypted(relative_path)
            else:
                yield relative_path, read(relative_path)