Commit 88c2b0a0217e0f5844d6ad078fa1da265dcafbcb
Committed by
GitHub
Merge pull request #403 from christian-intra2net/crypto-write-protect
Integrate decrypt from msoffcrypto-tools
Showing
23 changed files
with
754 additions
and
228 deletions
.travis.yml
oletools/common/errors.py
| ... | ... | @@ -4,10 +4,42 @@ Errors used in several tools to avoid duplication |
| 4 | 4 | .. codeauthor:: Intra2net AG <info@intra2net.com> |
| 5 | 5 | """ |
| 6 | 6 | |
| 7 | -class FileIsEncryptedError(ValueError): | |
| 7 | +class CryptoErrorBase(ValueError): | |
| 8 | + """Base class for crypto-based exceptions.""" | |
| 9 | + pass | |
| 10 | + | |
| 11 | + | |
| 12 | +class CryptoLibNotImported(CryptoErrorBase, ImportError): | |
| 13 | + """Exception thrown if msoffcrypto is needed but could not be imported.""" | |
| 14 | + | |
| 15 | + def __init__(self): | |
| 16 | + super(CryptoLibNotImported, self).__init__( | |
| 17 | + 'msoffcrypto-tools could not be imported') | |
| 18 | + | |
| 19 | + | |
| 20 | +class UnsupportedEncryptionError(CryptoErrorBase): | |
| 8 | 21 | """Exception thrown if file is encrypted and cannot deal with it.""" |
| 9 | - # see also: same class in olevba[3] and record_base | |
| 10 | 22 | def __init__(self, filename=None): |
| 11 | - super(FileIsEncryptedError, self).__init__( | |
| 23 | + super(UnsupportedEncryptionError, self).__init__( | |
| 12 | 24 | 'Office file {}is encrypted, not yet supported' |
| 13 | 25 | .format('' if filename is None else filename + ' ')) |
| 26 | + | |
| 27 | + | |
| 28 | +class WrongEncryptionPassword(CryptoErrorBase): | |
| 29 | + """Exception thrown if encryption could be handled but passwords wrong.""" | |
| 30 | + def __init__(self, filename=None): | |
| 31 | + super(WrongEncryptionPassword, self).__init__( | |
| 32 | + 'Given passwords could not decrypt office file{}' | |
| 33 | + .format('' if filename is None else ' ' + filename)) | |
| 34 | + | |
| 35 | + | |
| 36 | +class MaxCryptoNestingReached(CryptoErrorBase): | |
| 37 | + """ | |
| 38 | + Exception thrown if decryption is too deeply layered. | |
| 39 | + | |
| 40 | + (...or decrypt code creates inf loop) | |
| 41 | + """ | |
| 42 | + def __init__(self, n_layers, filename=None): | |
| 43 | + super(MaxCryptoNestingReached, self).__init__( | |
| 44 | + 'Encountered more than {} layers of encryption for office file{}' | |
| 45 | + .format(n_layers, '' if filename is None else ' ' + filename)) | ... | ... |
oletools/crypto.py
0 → 100644
| 1 | +#!/usr/bin/env python | |
| 2 | +""" | |
| 3 | +crypto.py | |
| 4 | + | |
| 5 | +Module to be used by other scripts and modules in oletools, that provides | |
| 6 | +information on encryption in OLE files. | |
| 7 | + | |
| 8 | +Uses :py:mod:`msoffcrypto-tool` to decrypt if it is available. Otherwise | |
| 9 | +decryption will fail with an ImportError. | |
| 10 | + | |
| 11 | +Encryption/Write-Protection can be realized in many different ways. They range | |
| 12 | +from setting a single flag in an otherwise unprotected file to embedding a | |
| 13 | +regular file (e.g. xlsx) in an EncryptedStream inside an OLE file. That means | |
| 14 | +that (1) that lots of bad things are accesible even if no encryption password | |
| 15 | +is known, and (2) even basic attributes like the file type can change by | |
| 16 | +decryption. Therefore I suggest the following general routine to deal with | |
| 17 | +potentially encrypted files:: | |
| 18 | + | |
| 19 | + def script_main_function(input_file, passwords, crypto_nesting=0, args): | |
| 20 | + '''Wrapper around main function to deal with encrypted files.''' | |
| 21 | + initial_stuff(input_file, args) | |
| 22 | + result = None | |
| 23 | + try: | |
| 24 | + result = do_your_thing_assuming_no_encryption(input_file) | |
| 25 | + if not crypto.is_encrypted(input_file): | |
| 26 | + return result | |
| 27 | + except Exception: | |
| 28 | + if not crypto.is_encrypted(input_file): | |
| 29 | + raise | |
| 30 | + # we reach this point only if file is encrypted | |
| 31 | + # check if this is an encrypted file in an encrypted file in an ... | |
| 32 | + if crypto_nesting >= crypto.MAX_NESTING_DEPTH: | |
| 33 | + raise crypto.MaxCryptoNestingReached(crypto_nesting, filename) | |
| 34 | + decrypted_file = None | |
| 35 | + try: | |
| 36 | + decrypted_file = crypto.decrypt(input_file, passwords) | |
| 37 | + # might still be encrypted, so call this again recursively | |
| 38 | + result = script_main_function(decrypted_file, passwords, | |
| 39 | + crypto_nesting+1, args) | |
| 40 | + except Exception: | |
| 41 | + raise | |
| 42 | + finally: # clean up | |
| 43 | + try: # (maybe file was not yet created) | |
| 44 | + os.unlink(decrypted_file) | |
| 45 | + except Exception: | |
| 46 | + pass | |
| 47 | + | |
| 48 | +(Realized e.g. in :py:mod:`oletools.msodde`). | |
| 49 | +That means that caller code needs another wrapper around its main function. I | |
| 50 | +did try it another way first (a transparent on-demand unencrypt) but for the | |
| 51 | +above reasons I believe this is the better way. Also, non-top-level-code can | |
| 52 | +just assume that it works on unencrypted data and fail with an exception if | |
| 53 | +encrypted data makes its work impossible. No need to check `if is_encrypted()` | |
| 54 | +at the start of functions. | |
| 55 | + | |
| 56 | +.. seealso:: [MS-OFFCRYPTO] | |
| 57 | +.. seealso:: https://github.com/nolze/msoffcrypto-tool | |
| 58 | + | |
| 59 | +crypto is part of the python-oletools package: | |
| 60 | +http://www.decalage.info/python/oletools | |
| 61 | +""" | |
| 62 | + | |
| 63 | +# === LICENSE ================================================================= | |
| 64 | + | |
| 65 | +# crypto is copyright (c) 2014-2019 Philippe Lagadec (http://www.decalage.info) | |
| 66 | +# All rights reserved. | |
| 67 | +# | |
| 68 | +# Redistribution and use in source and binary forms, with or without | |
| 69 | +# modification, are permitted provided that the following conditions are met: | |
| 70 | +# | |
| 71 | +# * Redistributions of source code must retain the above copyright notice, | |
| 72 | +# this list of conditions and the following disclaimer. | |
| 73 | +# * Redistributions in binary form must reproduce the above copyright notice, | |
| 74 | +# this list of conditions and the following disclaimer in the documentation | |
| 75 | +# and/or other materials provided with the distribution. | |
| 76 | +# | |
| 77 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
| 78 | +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 79 | +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 80 | +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | |
| 81 | +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
| 82 | +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
| 83 | +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
| 84 | +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
| 85 | +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 86 | +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
| 87 | +# POSSIBILITY OF SUCH DAMAGE. | |
| 88 | + | |
| 89 | +# ----------------------------------------------------------------------------- | |
| 90 | +# CHANGELOG: | |
| 91 | +# 2019-02-14 v0.01 CH: - first version with encryption check from oleid | |
| 92 | + | |
| 93 | +__version__ = '0.01' | |
| 94 | + | |
| 95 | +import sys | |
| 96 | +import struct | |
| 97 | +import os | |
| 98 | +from os.path import splitext, isfile | |
| 99 | +from tempfile import mkstemp | |
| 100 | +import zipfile | |
| 101 | +from oletools.common.errors import CryptoErrorBase, WrongEncryptionPassword, \ | |
| 102 | + UnsupportedEncryptionError, MaxCryptoNestingReached, CryptoLibNotImported | |
| 103 | +from olefile import OleFileIO | |
| 104 | + | |
| 105 | +try: | |
| 106 | + import msoffcrypto | |
| 107 | +except ImportError: | |
| 108 | + msoffcrypto = None | |
| 109 | + | |
| 110 | + | |
| 111 | +#: if there is an encrypted file embedded in an encrypted file, | |
| 112 | +#: how deep down do we go | |
| 113 | +MAX_NESTING_DEPTH = 10 | |
| 114 | + | |
| 115 | + | |
| 116 | +def is_encrypted(some_file): | |
| 117 | + """ | |
| 118 | + Determine whether document contains encrypted content. | |
| 119 | + | |
| 120 | + This should return False for documents that are just write-protected or | |
| 121 | + signed or finalized. It should return True if ANY content of the file is | |
| 122 | + encrypted and can therefore not be analyzed by other oletools modules | |
| 123 | + without given a password. | |
| 124 | + | |
| 125 | + Exception: there are way to write-protect an office document by embedding | |
| 126 | + it as encrypted stream with hard-coded standard password into an otherwise | |
| 127 | + empty OLE file. From an office user point of view, this is no encryption, | |
| 128 | + but regarding file structure this is encryption, so we return `True` for | |
| 129 | + these. | |
| 130 | + | |
| 131 | + This should not raise exceptions needlessly. | |
| 132 | + | |
| 133 | + This implementation is rather simple: it returns True if the file contains | |
| 134 | + streams with typical encryption names (c.f. [MS-OFFCRYPTO]). It does not | |
| 135 | + test whether these streams actually contain data or whether the ole file | |
| 136 | + structure contains the necessary references to these. It also checks the | |
| 137 | + "well-known property" PIDSI_DOC_SECURITY if the SummaryInformation stream | |
| 138 | + is accessible (c.f. [MS-OLEPS] 2.25.1) | |
| 139 | + | |
| 140 | + :param some_file: File name or an opened OleFileIO | |
| 141 | + :type some_file: :py:class:`olefile.OleFileIO` or `str` | |
| 142 | + :returns: True if (and only if) the file contains encrypted content | |
| 143 | + """ | |
| 144 | + if not isinstance(some_file, str): | |
| 145 | + return is_encrypted_ole(some_file) # assume it is OleFileIO | |
| 146 | + if zipfile.is_zipfile(some_file): | |
| 147 | + return is_encrypted_zip(some_file) | |
| 148 | + # otherwise assume it is the name of an ole file | |
| 149 | + return is_encrypted_ole(OleFileIO(some_file)) | |
| 150 | + | |
| 151 | + | |
| 152 | +def is_encrypted_zip(filename): | |
| 153 | + """Specialization of :py:func:`is_encrypted` for zip-based files.""" | |
| 154 | + # try to decrypt a few bytes from first entry | |
| 155 | + with zipfile.ZipFile(filename, 'r') as zipper: | |
| 156 | + first_entry = zipper.infolist()[0] | |
| 157 | + try: | |
| 158 | + with zipper.open(first_entry, 'r') as reader: | |
| 159 | + reader.read(min(16, first_entry.file_size)) | |
| 160 | + return False | |
| 161 | + except RuntimeError as rt_err: | |
| 162 | + return 'crypt' in str(rt_err) | |
| 163 | + | |
| 164 | + | |
| 165 | +def is_encrypted_ole(ole): | |
| 166 | + """Specialization of :py:func:`is_encrypted` for ole files.""" | |
| 167 | + # check well known property for password protection | |
| 168 | + # (this field may be missing for Powerpoint2000, for example) | |
| 169 | + # TODO: check whether password protection always implies encryption. Could | |
| 170 | + # write-protection or signing with password trigger this as well? | |
| 171 | + if ole.exists("\x05SummaryInformation"): | |
| 172 | + suminfo_data = ole.getproperties("\x05SummaryInformation") | |
| 173 | + if 0x13 in suminfo_data and (suminfo_data[0x13] & 1): | |
| 174 | + return True | |
| 175 | + | |
| 176 | + # check a few stream names | |
| 177 | + # TODO: check whether these actually contain data and whether other | |
| 178 | + # necessary properties exist / are set | |
| 179 | + elif ole.exists('EncryptionInfo'): | |
| 180 | + return True | |
| 181 | + # or an encrypted ppt file | |
| 182 | + elif ole.exists('EncryptedSummary') and \ | |
| 183 | + not ole.exists('SummaryInformation'): | |
| 184 | + return True | |
| 185 | + | |
| 186 | + # Word-specific old encryption: | |
| 187 | + if ole.exists('WordDocument'): | |
| 188 | + # check for Word-specific encryption flag: | |
| 189 | + stream = None | |
| 190 | + try: | |
| 191 | + stream = ole.openstream(["WordDocument"]) | |
| 192 | + # pass header 10 bytes | |
| 193 | + stream.read(10) | |
| 194 | + # read flag structure: | |
| 195 | + temp16 = struct.unpack("H", stream.read(2))[0] | |
| 196 | + f_encrypted = (temp16 & 0x0100) >> 8 | |
| 197 | + if f_encrypted: | |
| 198 | + return True | |
| 199 | + except Exception: | |
| 200 | + raise | |
| 201 | + finally: | |
| 202 | + if stream is not None: | |
| 203 | + stream.close() | |
| 204 | + | |
| 205 | + # no indication of encryption | |
| 206 | + return False | |
| 207 | + | |
| 208 | + | |
| 209 | +#: one way to achieve "write protection" in office files is to encrypt the file | |
| 210 | +#: using this password | |
| 211 | +WRITE_PROTECT_ENCRYPTION_PASSWORD = 'VelvetSweatshop' | |
| 212 | + | |
| 213 | + | |
| 214 | +def _check_msoffcrypto(): | |
| 215 | + """Raise a :py:class:`CryptoLibNotImported` if msoffcrypto not imported.""" | |
| 216 | + if msoffcrypto is None: | |
| 217 | + raise CryptoLibNotImported() | |
| 218 | + | |
| 219 | + | |
| 220 | +def check_msoffcrypto(): | |
| 221 | + """Return `True` iff :py:mod:`msoffcrypto` could be imported.""" | |
| 222 | + return msoffcrypto is not None | |
| 223 | + | |
| 224 | + | |
| 225 | +def decrypt(filename, passwords=None, **temp_file_args): | |
| 226 | + """ | |
| 227 | + Try to decrypt an encrypted file | |
| 228 | + | |
| 229 | + This function tries to decrypt the given file using a given set of | |
| 230 | + passwords. If no password is given, tries the standard password for write | |
| 231 | + protection. Creates a file with decrypted data whose file name is returned. | |
| 232 | + If the decryption fails, None is returned. | |
| 233 | + | |
| 234 | + :param str filename: path to an ole file on disc | |
| 235 | + :param passwords: list/set/tuple/... of passwords or a single password or | |
| 236 | + None | |
| 237 | + :type passwords: iterable or str or None | |
| 238 | + :param temp_file_args: arguments for :py:func:`tempfile.mkstemp` e.g., | |
| 239 | + `dirname` or `prefix`. `suffix` will default to | |
| 240 | + suffix of input `filename`, `prefix` defaults to | |
| 241 | + `oletools-decrypt-`; `text` will be ignored | |
| 242 | + :returns: name of the decrypted temporary file. | |
| 243 | + :raises: :py:class:`ImportError` if :py:mod:`msoffcrypto-tools` not found | |
| 244 | + :raises: :py:class:`ValueError` if the given file is not encrypted | |
| 245 | + """ | |
| 246 | + _check_msoffcrypto() | |
| 247 | + | |
| 248 | + # normalize password so we always have a list/tuple | |
| 249 | + if isinstance(passwords, str): | |
| 250 | + passwords = (passwords, ) | |
| 251 | + elif not passwords: | |
| 252 | + passwords = (WRITE_PROTECT_ENCRYPTION_PASSWORD, ) | |
| 253 | + | |
| 254 | + # check temp file args | |
| 255 | + if 'prefix' not in temp_file_args: | |
| 256 | + temp_file_args['prefix'] = 'oletools-decrypt-' | |
| 257 | + if 'suffix' not in temp_file_args: | |
| 258 | + temp_file_args['suffix'] = splitext(filename)[1] | |
| 259 | + temp_file_args['text'] = False | |
| 260 | + | |
| 261 | + decrypt_file = None | |
| 262 | + with open(filename, 'rb') as reader: | |
| 263 | + try: | |
| 264 | + crypto_file = msoffcrypto.OfficeFile(reader) | |
| 265 | + except Exception as exc: # e.g. ppt, not yet supported by msoffcrypto | |
| 266 | + if 'Unrecognized file format' in str(exc): | |
| 267 | + # raise different exception without stack trace of original exc | |
| 268 | + if sys.version_info.major == 2: | |
| 269 | + raise UnsupportedEncryptionError(filename) | |
| 270 | + else: | |
| 271 | + # this is a syntax error in python 2, so wrap it in exec() | |
| 272 | + exec('raise UnsupportedEncryptionError(filename) from None') | |
| 273 | + else: | |
| 274 | + raise | |
| 275 | + if not crypto_file.is_encrypted(): | |
| 276 | + raise ValueError('Given input file {} is not encrypted!' | |
| 277 | + .format(filename)) | |
| 278 | + | |
| 279 | + for password in passwords: | |
| 280 | + write_descriptor = None | |
| 281 | + write_handle = None | |
| 282 | + decrypt_file = None | |
| 283 | + try: | |
| 284 | + crypto_file.load_key(password=password) | |
| 285 | + | |
| 286 | + # create temp file | |
| 287 | + write_descriptor, decrypt_file = mkstemp(**temp_file_args) | |
| 288 | + write_handle = os.fdopen(write_descriptor, 'wb') | |
| 289 | + write_descriptor = None # is now handled via write_handle | |
| 290 | + crypto_file.decrypt(write_handle) | |
| 291 | + | |
| 292 | + # decryption was successfull; clean up and return | |
| 293 | + write_handle.close() | |
| 294 | + write_handle = None | |
| 295 | + break | |
| 296 | + except Exception: | |
| 297 | + # error-clean up: close everything and del temp file | |
| 298 | + if write_handle: | |
| 299 | + write_handle.close() | |
| 300 | + elif write_descriptor: | |
| 301 | + os.close(write_descriptor) | |
| 302 | + if decrypt_file and isfile(decrypt_file): | |
| 303 | + os.unlink(decrypt_file) | |
| 304 | + decrypt_file = None | |
| 305 | + # if we reach this, all passwords were tried without success | |
| 306 | + return decrypt_file | ... | ... |
oletools/msodde.py
| ... | ... | @@ -11,7 +11,6 @@ Supported formats: |
| 11 | 11 | - RTF |
| 12 | 12 | - CSV (exported from / imported into Excel) |
| 13 | 13 | - XML (exported from Word 2003, Word 2007+, Excel 2003, (Excel 2007+?) |
| 14 | -- raises an error if run with files encrypted using MS Crypto API RC4 | |
| 15 | 14 | |
| 16 | 15 | Author: Philippe Lagadec - http://www.decalage.info |
| 17 | 16 | License: BSD, see source code or documentation |
| ... | ... | @@ -52,7 +51,6 @@ from __future__ import print_function |
| 52 | 51 | |
| 53 | 52 | import argparse |
| 54 | 53 | import os |
| 55 | -from os.path import abspath, dirname | |
| 56 | 54 | import sys |
| 57 | 55 | import re |
| 58 | 56 | import csv |
| ... | ... | @@ -62,9 +60,9 @@ import olefile |
| 62 | 60 | from oletools import ooxml |
| 63 | 61 | from oletools import xls_parser |
| 64 | 62 | from oletools import rtfobj |
| 65 | -from oletools import oleid | |
| 63 | +from oletools.ppt_record_parser import is_ppt | |
| 64 | +from oletools import crypto | |
| 66 | 65 | from oletools.common.log_helper import log_helper |
| 67 | -from oletools.common.errors import FileIsEncryptedError | |
| 68 | 66 | |
| 69 | 67 | # ----------------------------------------------------------------------------- |
| 70 | 68 | # CHANGELOG: |
| ... | ... | @@ -305,6 +303,9 @@ def process_args(cmd_line_args=None): |
| 305 | 303 | default=DEFAULT_LOG_LEVEL, |
| 306 | 304 | help="logging level debug/info/warning/error/critical " |
| 307 | 305 | "(default=%(default)s)") |
| 306 | + parser.add_argument("-p", "--password", type=str, action='append', | |
| 307 | + help='if encrypted office files are encountered, try ' | |
| 308 | + 'decryption with this password. May be repeated.') | |
| 308 | 309 | filter_group = parser.add_argument_group( |
| 309 | 310 | title='Filter which OpenXML field commands are returned', |
| 310 | 311 | description='Only applies to OpenXML (e.g. docx) and rtf, not to OLE ' |
| ... | ... | @@ -352,10 +353,9 @@ def process_doc_field(data): |
| 352 | 353 | |
| 353 | 354 | if data.lstrip().lower().startswith(u'dde'): |
| 354 | 355 | return data |
| 355 | - elif data.lstrip().lower().startswith(u'\x00d\x00d\x00e\x00'): | |
| 356 | + if data.lstrip().lower().startswith(u'\x00d\x00d\x00e\x00'): | |
| 356 | 357 | return data |
| 357 | - else: | |
| 358 | - return u'' | |
| 358 | + return u'' | |
| 359 | 359 | |
| 360 | 360 | |
| 361 | 361 | OLE_FIELD_START = 0x13 |
| ... | ... | @@ -379,7 +379,7 @@ def process_doc_stream(stream): |
| 379 | 379 | while True: |
| 380 | 380 | idx += 1 |
| 381 | 381 | char = stream.read(1) # loop over every single byte |
| 382 | - if len(char) == 0: | |
| 382 | + if len(char) == 0: # pylint: disable=len-as-condition | |
| 383 | 383 | break |
| 384 | 384 | else: |
| 385 | 385 | char = ord(char) |
| ... | ... | @@ -417,7 +417,7 @@ def process_doc_stream(stream): |
| 417 | 417 | pass |
| 418 | 418 | elif len(field_contents) > OLE_FIELD_MAX_SIZE: |
| 419 | 419 | logger.debug('field exceeds max size of {0}. Ignore rest' |
| 420 | - .format(OLE_FIELD_MAX_SIZE)) | |
| 420 | + .format(OLE_FIELD_MAX_SIZE)) | |
| 421 | 421 | max_size_exceeded = True |
| 422 | 422 | |
| 423 | 423 | # appending a raw byte to a unicode string here. Not clean but |
| ... | ... | @@ -437,7 +437,7 @@ def process_doc_stream(stream): |
| 437 | 437 | logger.debug('big field was not a field after all') |
| 438 | 438 | |
| 439 | 439 | logger.debug('Checked {0} characters, found {1} fields' |
| 440 | - .format(idx, len(result_parts))) | |
| 440 | + .format(idx, len(result_parts))) | |
| 441 | 441 | |
| 442 | 442 | return result_parts |
| 443 | 443 | |
| ... | ... | @@ -462,11 +462,10 @@ def process_doc(ole): |
| 462 | 462 | direntry = ole._load_direntry(sid) |
| 463 | 463 | is_stream = direntry.entry_type == olefile.STGTY_STREAM |
| 464 | 464 | logger.debug('direntry {:2d} {}: {}' |
| 465 | - .format(sid, '[orphan]' if is_orphan else direntry.name, | |
| 466 | - 'is stream of size {}'.format(direntry.size) | |
| 467 | - if is_stream else | |
| 468 | - 'no stream ({})' | |
| 469 | - .format(direntry.entry_type))) | |
| 465 | + .format(sid, '[orphan]' if is_orphan else direntry.name, | |
| 466 | + 'is stream of size {}'.format(direntry.size) | |
| 467 | + if is_stream else | |
| 468 | + 'no stream ({})'.format(direntry.entry_type))) | |
| 470 | 469 | if is_stream: |
| 471 | 470 | new_parts = process_doc_stream( |
| 472 | 471 | ole._open(direntry.isectStart, direntry.size)) |
| ... | ... | @@ -525,7 +524,8 @@ def process_docx(filepath, field_filter_mode=None): |
| 525 | 524 | else: |
| 526 | 525 | elem = curr_elem |
| 527 | 526 | if elem is None: |
| 528 | - raise BadOOXML(filepath, 'Got "None"-Element from iter_xml') | |
| 527 | + raise ooxml.BadOOXML(filepath, | |
| 528 | + 'Got "None"-Element from iter_xml') | |
| 529 | 529 | |
| 530 | 530 | # check if FLDCHARTYPE and whether "begin" or "end" tag |
| 531 | 531 | attrib_type = elem.attrib.get(ATTR_W_FLDCHARTYPE[0]) or \ |
| ... | ... | @@ -535,7 +535,7 @@ def process_docx(filepath, field_filter_mode=None): |
| 535 | 535 | level += 1 |
| 536 | 536 | if attrib_type == "end": |
| 537 | 537 | level -= 1 |
| 538 | - if level == 0 or level == -1: # edge-case; level gets -1 | |
| 538 | + if level in (0, -1): # edge-case; level gets -1 | |
| 539 | 539 | all_fields.append(ddetext) |
| 540 | 540 | ddetext = u'' |
| 541 | 541 | level = 0 # reset edge-case |
| ... | ... | @@ -564,6 +564,7 @@ def process_docx(filepath, field_filter_mode=None): |
| 564 | 564 | |
| 565 | 565 | |
| 566 | 566 | def unquote(field): |
| 567 | + """TODO: document what exactly is happening here...""" | |
| 567 | 568 | if "QUOTE" not in field or NO_QUOTES: |
| 568 | 569 | return field |
| 569 | 570 | # split into components |
| ... | ... | @@ -606,7 +607,7 @@ def field_is_blacklisted(contents): |
| 606 | 607 | except ValueError: # first word is no blacklisted command |
| 607 | 608 | return False |
| 608 | 609 | logger.debug('trying to match "{0}" to blacklist command {1}' |
| 609 | - .format(contents, FIELD_BLACKLIST[index])) | |
| 610 | + .format(contents, FIELD_BLACKLIST[index])) | |
| 610 | 611 | _, nargs_required, nargs_optional, sw_with_arg, sw_solo, sw_format \ |
| 611 | 612 | = FIELD_BLACKLIST[index] |
| 612 | 613 | |
| ... | ... | @@ -618,11 +619,12 @@ def field_is_blacklisted(contents): |
| 618 | 619 | nargs += 1 |
| 619 | 620 | if nargs < nargs_required: |
| 620 | 621 | logger.debug('too few args: found {0}, but need at least {1} in "{2}"' |
| 621 | - .format(nargs, nargs_required, contents)) | |
| 622 | + .format(nargs, nargs_required, contents)) | |
| 622 | 623 | return False |
| 623 | - elif nargs > nargs_required + nargs_optional: | |
| 624 | - logger.debug('too many args: found {0}, but need at most {1}+{2} in "{3}"' | |
| 625 | - .format(nargs, nargs_required, nargs_optional, contents)) | |
| 624 | + if nargs > nargs_required + nargs_optional: | |
| 625 | + logger.debug('too many args: found {0}, but need at most {1}+{2} in ' | |
| 626 | + '"{3}"' | |
| 627 | + .format(nargs, nargs_required, nargs_optional, contents)) | |
| 626 | 628 | return False |
| 627 | 629 | |
| 628 | 630 | # check switches |
| ... | ... | @@ -632,14 +634,14 @@ def field_is_blacklisted(contents): |
| 632 | 634 | if expect_arg: # this is an argument for the last switch |
| 633 | 635 | if arg_choices and (word not in arg_choices): |
| 634 | 636 | logger.debug('Found invalid switch argument "{0}" in "{1}"' |
| 635 | - .format(word, contents)) | |
| 637 | + .format(word, contents)) | |
| 636 | 638 | return False |
| 637 | 639 | expect_arg = False |
| 638 | 640 | arg_choices = [] # in general, do not enforce choices |
| 639 | 641 | continue # "no further questions, your honor" |
| 640 | 642 | elif not FIELD_SWITCH_REGEX.match(word): |
| 641 | 643 | logger.debug('expected switch, found "{0}" in "{1}"' |
| 642 | - .format(word, contents)) | |
| 644 | + .format(word, contents)) | |
| 643 | 645 | return False |
| 644 | 646 | # we want a switch and we got a valid one |
| 645 | 647 | switch = word[1] |
| ... | ... | @@ -661,7 +663,7 @@ def field_is_blacklisted(contents): |
| 661 | 663 | arg_choices = [] # too many choices to list them here |
| 662 | 664 | else: |
| 663 | 665 | logger.debug('unexpected switch {0} in "{1}"' |
| 664 | - .format(switch, contents)) | |
| 666 | + .format(switch, contents)) | |
| 665 | 667 | return False |
| 666 | 668 | |
| 667 | 669 | # if nothing went wrong sofar, the contents seems to match the blacklist |
| ... | ... | @@ -676,7 +678,7 @@ def process_xlsx(filepath): |
| 676 | 678 | tag = elem.tag.lower() |
| 677 | 679 | if tag == 'ddelink' or tag.endswith('}ddelink'): |
| 678 | 680 | # we have found a dde link. Try to get more info about it |
| 679 | - link_info = ['DDE-Link'] | |
| 681 | + link_info = [] | |
| 680 | 682 | if 'ddeService' in elem.attrib: |
| 681 | 683 | link_info.append(elem.attrib['ddeService']) |
| 682 | 684 | if 'ddeTopic' in elem.attrib: |
| ... | ... | @@ -687,16 +689,15 @@ def process_xlsx(filepath): |
| 687 | 689 | for subfile, content_type, handle in parser.iter_non_xml(): |
| 688 | 690 | try: |
| 689 | 691 | logger.info('Parsing non-xml subfile {0} with content type {1}' |
| 690 | - .format(subfile, content_type)) | |
| 692 | + .format(subfile, content_type)) | |
| 691 | 693 | for record in xls_parser.parse_xlsb_part(handle, content_type, |
| 692 | 694 | subfile): |
| 693 | 695 | logger.debug('{0}: {1}'.format(subfile, record)) |
| 694 | 696 | if isinstance(record, xls_parser.XlsbBeginSupBook) and \ |
| 695 | 697 | record.link_type == \ |
| 696 | 698 | xls_parser.XlsbBeginSupBook.LINK_TYPE_DDE: |
| 697 | - dde_links.append('DDE-Link ' + record.string1 + ' ' + | |
| 698 | - record.string2) | |
| 699 | - except Exception: | |
| 699 | + dde_links.append(record.string1 + ' ' + record.string2) | |
| 700 | + except Exception as exc: | |
| 700 | 701 | if content_type.startswith('application/vnd.ms-excel.') or \ |
| 701 | 702 | content_type.startswith('application/vnd.ms-office.'): # pylint: disable=bad-indentation |
| 702 | 703 | # should really be able to parse these either as xml or records |
| ... | ... | @@ -727,7 +728,8 @@ class RtfFieldParser(rtfobj.RtfParser): |
| 727 | 728 | |
| 728 | 729 | def open_destination(self, destination): |
| 729 | 730 | if destination.cword == b'fldinst': |
| 730 | - logger.debug('*** Start field data at index %Xh' % destination.start) | |
| 731 | + logger.debug('*** Start field data at index %Xh' | |
| 732 | + % destination.start) | |
| 731 | 733 | |
| 732 | 734 | def close_destination(self, destination): |
| 733 | 735 | if destination.cword == b'fldinst': |
| ... | ... | @@ -758,7 +760,7 @@ def process_rtf(file_handle, field_filter_mode=None): |
| 758 | 760 | all_fields = [field.decode('ascii') for field in rtfparser.fields] |
| 759 | 761 | # apply field command filter |
| 760 | 762 | logger.debug('found {1} fields, filtering with mode "{0}"' |
| 761 | - .format(field_filter_mode, len(all_fields))) | |
| 763 | + .format(field_filter_mode, len(all_fields))) | |
| 762 | 764 | if field_filter_mode in (FIELD_FILTER_ALL, None): |
| 763 | 765 | clean_fields = all_fields |
| 764 | 766 | elif field_filter_mode == FIELD_FILTER_DDE: |
| ... | ... | @@ -815,11 +817,12 @@ def process_csv(filepath): |
| 815 | 817 | results, _ = process_csv_dialect(file_handle, delim) |
| 816 | 818 | except csv.Error: # e.g. sniffing fails |
| 817 | 819 | logger.debug('failed to csv-parse with delimiter {0!r}' |
| 818 | - .format(delim)) | |
| 820 | + .format(delim)) | |
| 819 | 821 | |
| 820 | 822 | if is_small and not results: |
| 821 | 823 | # try whole file as single cell, since sniffing fails in this case |
| 822 | - logger.debug('last attempt: take whole file as single unquoted cell') | |
| 824 | + logger.debug('last attempt: take whole file as single unquoted ' | |
| 825 | + 'cell') | |
| 823 | 826 | file_handle.seek(0) |
| 824 | 827 | match = CSV_DDE_FORMAT.match(file_handle.read(CSV_SMALL_THRESH)) |
| 825 | 828 | if match: |
| ... | ... | @@ -836,8 +839,8 @@ def process_csv_dialect(file_handle, delimiters): |
| 836 | 839 | delimiters=delimiters) |
| 837 | 840 | dialect.strict = False # microsoft is never strict |
| 838 | 841 | logger.debug('sniffed csv dialect with delimiter {0!r} ' |
| 839 | - 'and quote char {1!r}' | |
| 840 | - .format(dialect.delimiter, dialect.quotechar)) | |
| 842 | + 'and quote char {1!r}' | |
| 843 | + .format(dialect.delimiter, dialect.quotechar)) | |
| 841 | 844 | |
| 842 | 845 | # rewind file handle to start |
| 843 | 846 | file_handle.seek(0) |
| ... | ... | @@ -892,19 +895,12 @@ def process_file(filepath, field_filter_mode=None): |
| 892 | 895 | logger.debug('Process file as excel 2003 (xls)') |
| 893 | 896 | return process_xls(filepath) |
| 894 | 897 | |
| 895 | - # encrypted files also look like ole, even if office 2007+ (xml-based) | |
| 896 | - # so check for encryption, first | |
| 897 | 898 | ole = olefile.OleFileIO(filepath, path_encoding=None) |
| 898 | - oid = oleid.OleID(ole) | |
| 899 | - if oid.check_encrypted().value: | |
| 900 | - log.debug('is encrypted - raise error') | |
| 901 | - raise FileIsEncryptedError(filepath) | |
| 902 | - elif oid.check_powerpoint().value: | |
| 903 | - log.debug('is ppt - cannot have DDE') | |
| 899 | + if is_ppt(ole): | |
| 900 | + logger.debug('is ppt - cannot have DDE') | |
| 904 | 901 | return u'' |
| 905 | - else: | |
| 906 | - logger.debug('Process file as word 2003 (doc)') | |
| 907 | - return process_doc(ole) | |
| 902 | + logger.debug('Process file as word 2003 (doc)') | |
| 903 | + return process_doc(ole) | |
| 908 | 904 | |
| 909 | 905 | with open(filepath, 'rb') as file_handle: |
| 910 | 906 | if file_handle.read(4) == RTF_START: |
| ... | ... | @@ -921,22 +917,73 @@ def process_file(filepath, field_filter_mode=None): |
| 921 | 917 | if doctype == ooxml.DOCTYPE_EXCEL: |
| 922 | 918 | logger.debug('Process file as excel 2007+ (xlsx)') |
| 923 | 919 | return process_xlsx(filepath) |
| 924 | - elif doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003): | |
| 920 | + if doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003): | |
| 925 | 921 | logger.debug('Process file as xml from excel 2003/2007+') |
| 926 | 922 | return process_excel_xml(filepath) |
| 927 | - elif doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003): | |
| 923 | + if doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003): | |
| 928 | 924 | logger.debug('Process file as xml from word 2003/2007+') |
| 929 | 925 | return process_docx(filepath) |
| 930 | - elif doctype is None: | |
| 926 | + if doctype is None: | |
| 931 | 927 | logger.debug('Process file as csv') |
| 932 | 928 | return process_csv(filepath) |
| 933 | - else: # could be docx; if not: this is the old default code path | |
| 934 | - logger.debug('Process file as word 2007+ (docx)') | |
| 935 | - return process_docx(filepath, field_filter_mode) | |
| 929 | + # could be docx; if not: this is the old default code path | |
| 930 | + logger.debug('Process file as word 2007+ (docx)') | |
| 931 | + return process_docx(filepath, field_filter_mode) | |
| 936 | 932 | |
| 937 | 933 | |
| 938 | 934 | # === MAIN ================================================================= |
| 939 | 935 | |
| 936 | + | |
| 937 | +def process_maybe_encrypted(filepath, passwords=None, crypto_nesting=0, | |
| 938 | + **kwargs): | |
| 939 | + """ | |
| 940 | + Process a file that might be encrypted. | |
| 941 | + | |
| 942 | + Calls :py:func:`process_file` and if that fails tries to decrypt and | |
| 943 | + process the result. Based on recommendation in module doc string of | |
| 944 | + :py:mod:`oletools.crypto`. | |
| 945 | + | |
| 946 | + :param str filepath: path to file on disc. | |
| 947 | + :param passwords: list of passwords (str) to try for decryption or None | |
| 948 | + :param int crypto_nesting: How many decryption layers were already used to | |
| 949 | + get the given file. | |
| 950 | + :param kwargs: same as :py:func:`process_file` | |
| 951 | + :returns: same as :py:func:`process_file` | |
| 952 | + """ | |
| 953 | + result = u'' | |
| 954 | + try: | |
| 955 | + result = process_file(filepath, **kwargs) | |
| 956 | + if not crypto.is_encrypted(filepath): | |
| 957 | + return result | |
| 958 | + except Exception: | |
| 959 | + if not crypto.is_encrypted(filepath): | |
| 960 | + raise | |
| 961 | + | |
| 962 | + # we reach this point only if file is encrypted | |
| 963 | + # check if this is an encrypted file in an encrypted file in an ... | |
| 964 | + if crypto_nesting >= crypto.MAX_NESTING_DEPTH: | |
| 965 | + raise crypto.MaxCryptoNestingReached(crypto_nesting, filepath) | |
| 966 | + | |
| 967 | + decrypted_file = None | |
| 968 | + if passwords is None: | |
| 969 | + passwords = [crypto.WRITE_PROTECT_ENCRYPTION_PASSWORD, ] | |
| 970 | + else: | |
| 971 | + passwords = list(passwords) + \ | |
| 972 | + [crypto.WRITE_PROTECT_ENCRYPTION_PASSWORD, ] | |
| 973 | + try: | |
| 974 | + logger.debug('Trying to decrypt file') | |
| 975 | + decrypted_file = crypto.decrypt(filepath, passwords) | |
| 976 | + logger.info('Analyze decrypted file') | |
| 977 | + result = process_maybe_encrypted(decrypted_file, passwords, | |
| 978 | + crypto_nesting+1, **kwargs) | |
| 979 | + finally: # clean up | |
| 980 | + try: # (maybe file was not yet created) | |
| 981 | + os.unlink(decrypted_file) | |
| 982 | + except Exception: | |
| 983 | + pass | |
| 984 | + return result | |
| 985 | + | |
| 986 | + | |
| 940 | 987 | def main(cmd_line_args=None): |
| 941 | 988 | """ Main function, called if this file is called as a script |
| 942 | 989 | |
| ... | ... | @@ -961,10 +1008,12 @@ def main(cmd_line_args=None): |
| 961 | 1008 | text = '' |
| 962 | 1009 | return_code = 1 |
| 963 | 1010 | try: |
| 964 | - text = process_file(args.filepath, args.field_filter_mode) | |
| 1011 | + text = process_maybe_encrypted( | |
| 1012 | + args.filepath, args.password, | |
| 1013 | + field_filter_mode=args.field_filter_mode) | |
| 965 | 1014 | return_code = 0 |
| 966 | 1015 | except Exception as exc: |
| 967 | - logger.exception(exc.message) | |
| 1016 | + logger.exception(str(exc)) | |
| 968 | 1017 | |
| 969 | 1018 | logger.print_str('DDE Links:') |
| 970 | 1019 | logger.print_str(text) | ... | ... |
oletools/oleid.py
| ... | ... | @@ -93,6 +93,7 @@ except ImportError: |
| 93 | 93 | sys.path.insert(0, PARENT_DIR) |
| 94 | 94 | del PARENT_DIR |
| 95 | 95 | from oletools.thirdparty.prettytable import prettytable |
| 96 | +from oletools import crypto | |
| 96 | 97 | |
| 97 | 98 | import olefile |
| 98 | 99 | |
| ... | ... | @@ -279,20 +280,7 @@ class OleID(object): |
| 279 | 280 | self.indicators.append(encrypted) |
| 280 | 281 | if not self.ole: |
| 281 | 282 | return None |
| 282 | - # check if bit 1 of security field = 1: | |
| 283 | - # (this field may be missing for Powerpoint2000, for example) | |
| 284 | - if self.suminfo_data is None: | |
| 285 | - self.check_properties() | |
| 286 | - if 0x13 in self.suminfo_data: | |
| 287 | - if self.suminfo_data[0x13] & 1: | |
| 288 | - encrypted.value = True | |
| 289 | - # check if this is an OpenXML encrypted file | |
| 290 | - elif self.ole.exists('EncryptionInfo'): | |
| 291 | - encrypted.value = True | |
| 292 | - # or an encrypted ppt file | |
| 293 | - if self.ole.exists('EncryptedSummary') and \ | |
| 294 | - not self.ole.exists('SummaryInformation'): | |
| 295 | - encrypted.value = True | |
| 283 | + encrypted.value = crypto.is_encrypted(self.ole) | |
| 296 | 284 | return encrypted |
| 297 | 285 | |
| 298 | 286 | def check_word(self): |
| ... | ... | @@ -316,27 +304,7 @@ class OleID(object): |
| 316 | 304 | return None, None |
| 317 | 305 | if self.ole.exists('WordDocument'): |
| 318 | 306 | word.value = True |
| 319 | - # check for Word-specific encryption flag: | |
| 320 | - stream = None | |
| 321 | - try: | |
| 322 | - stream = self.ole.openstream(["WordDocument"]) | |
| 323 | - # pass header 10 bytes | |
| 324 | - stream.read(10) | |
| 325 | - # read flag structure: | |
| 326 | - temp16 = struct.unpack("H", stream.read(2))[0] | |
| 327 | - f_encrypted = (temp16 & 0x0100) >> 8 | |
| 328 | - if f_encrypted: | |
| 329 | - # correct encrypted indicator if present or add one | |
| 330 | - encrypt_ind = self.get_indicator('encrypted') | |
| 331 | - if encrypt_ind: | |
| 332 | - encrypt_ind.value = True | |
| 333 | - else: | |
| 334 | - self.indicators.append('encrypted', True, name='Encrypted') | |
| 335 | - except Exception: | |
| 336 | - raise | |
| 337 | - finally: | |
| 338 | - if stream is not None: | |
| 339 | - stream.close() | |
| 307 | + | |
| 340 | 308 | # check for VBA macros: |
| 341 | 309 | if self.ole.exists('Macros'): |
| 342 | 310 | macros.value = True | ... | ... |
oletools/olevba.py
| ... | ... | @@ -312,8 +312,7 @@ from pyparsing import \ |
| 312 | 312 | from oletools import ppt_parser |
| 313 | 313 | from oletools import oleform |
| 314 | 314 | from oletools import rtfobj |
| 315 | -from oletools import oleid | |
| 316 | -from oletools.common.errors import FileIsEncryptedError | |
| 315 | +from oletools import crypto | |
| 317 | 316 | from oletools.common import codepages |
| 318 | 317 | |
| 319 | 318 | # monkeypatch email to fix issue #32: |
| ... | ... | @@ -2585,12 +2584,6 @@ class VBA_Parser(object): |
| 2585 | 2584 | # This looks like an OLE file |
| 2586 | 2585 | self.open_ole(_file) |
| 2587 | 2586 | |
| 2588 | - # check whether file is encrypted (need to do this before try ppt) | |
| 2589 | - log.debug('Check encryption of ole file') | |
| 2590 | - crypt_indicator = oleid.OleID(self.ole_file).check_encrypted() | |
| 2591 | - if crypt_indicator.value: | |
| 2592 | - raise FileIsEncryptedError(filename) | |
| 2593 | - | |
| 2594 | 2587 | # if this worked, try whether it is a ppt file (special ole file) |
| 2595 | 2588 | self.open_ppt() |
| 2596 | 2589 | if self.type is None and zipfile.is_zipfile(_file): |
| ... | ... | @@ -3741,6 +3734,10 @@ def parse_args(cmd_line_args=None): |
| 3741 | 3734 | help='find files recursively in subdirectories.') |
| 3742 | 3735 | parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, |
| 3743 | 3736 | help='if the file is a zip archive, open all files from it, using the provided password.') |
| 3737 | + parser.add_option("-p", "--password", type='str', action='append', | |
| 3738 | + default=[], | |
| 3739 | + help='if encrypted office files are encountered, try ' | |
| 3740 | + 'decryption with this password. May be repeated.') | |
| 3744 | 3741 | parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', |
| 3745 | 3742 | help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') |
| 3746 | 3743 | # output mode; could make this even simpler with add_option(type='choice') but that would make |
| ... | ... | @@ -3790,6 +3787,106 @@ def parse_args(cmd_line_args=None): |
| 3790 | 3787 | return options, args |
| 3791 | 3788 | |
| 3792 | 3789 | |
| 3790 | +def process_file(filename, data, container, options, crypto_nesting=0): | |
| 3791 | + """ | |
| 3792 | + Part of main function that processes a single file. | |
| 3793 | + | |
| 3794 | + This handles exceptions and encryption. | |
| 3795 | + | |
| 3796 | + Returns a single code summarizing the status of processing of this file | |
| 3797 | + """ | |
| 3798 | + try: | |
| 3799 | + # Open the file | |
| 3800 | + vba_parser = VBA_Parser_CLI(filename, data=data, container=container, | |
| 3801 | + relaxed=options.relaxed) | |
| 3802 | + | |
| 3803 | + if options.output_mode == 'detailed': | |
| 3804 | + # fully detailed output | |
| 3805 | + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, | |
| 3806 | + display_code=options.display_code, | |
| 3807 | + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3808 | + show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3809 | + deobfuscate=options.deobfuscate) | |
| 3810 | + elif options.output_mode == 'triage': | |
| 3811 | + # summarized output for triage: | |
| 3812 | + vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, | |
| 3813 | + deobfuscate=options.deobfuscate) | |
| 3814 | + elif options.output_mode == 'json': | |
| 3815 | + print_json( | |
| 3816 | + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, | |
| 3817 | + display_code=options.display_code, | |
| 3818 | + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3819 | + show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3820 | + deobfuscate=options.deobfuscate)) | |
| 3821 | + else: # (should be impossible) | |
| 3822 | + raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) | |
| 3823 | + | |
| 3824 | + # even if processing succeeds, file might still be encrypted | |
| 3825 | + log.debug('Checking for encryption') | |
| 3826 | + if not crypto.is_encrypted(filename): | |
| 3827 | + return RETURN_OK | |
| 3828 | + except Exception as exc: | |
| 3829 | + log.debug('Checking for encryption') | |
| 3830 | + if crypto.is_encrypted(filename): | |
| 3831 | + pass # deal with this below | |
| 3832 | + else: | |
| 3833 | + if isinstance(exc, (SubstreamOpenError, UnexpectedDataError)): | |
| 3834 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3835 | + print('%-12s %s - Error opening substream or uenxpected ' \ | |
| 3836 | + 'content' % ('?', filename)) | |
| 3837 | + elif options.output_mode == 'json': | |
| 3838 | + print_json(file=filename, type='error', | |
| 3839 | + error=type(exc).__name__, message=str(exc)) | |
| 3840 | + else: | |
| 3841 | + log.exception('Error opening substream or unexpected ' | |
| 3842 | + 'content in %s' % filename) | |
| 3843 | + return RETURN_OPEN_ERROR | |
| 3844 | + elif isinstance(exc, FileOpenError): | |
| 3845 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3846 | + print('%-12s %s - File format not supported' % ('?', filename)) | |
| 3847 | + elif options.output_mode == 'json': | |
| 3848 | + print_json(file=filename, type='error', | |
| 3849 | + error=type(exc).__name__, message=str(exc)) | |
| 3850 | + else: | |
| 3851 | + log.exception('Failed to open %s -- probably not supported!' % filename) | |
| 3852 | + return RETURN_OPEN_ERROR | |
| 3853 | + elif isinstance(exc, ProcessingError): | |
| 3854 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3855 | + print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc)) | |
| 3856 | + elif options.output_mode == 'json': | |
| 3857 | + print_json(file=filename, type='error', | |
| 3858 | + error=type(exc).__name__, | |
| 3859 | + message=str(exc.orig_exc)) | |
| 3860 | + else: | |
| 3861 | + log.exception('Error processing file %s (%s)!' | |
| 3862 | + % (filename, exc.orig_exc)) | |
| 3863 | + return RETURN_PARSE_ERROR | |
| 3864 | + else: | |
| 3865 | + raise # let caller deal with this | |
| 3866 | + | |
| 3867 | + # we reach this point only if file is encrypted | |
| 3868 | + # check if this is an encrypted file in an encrypted file in an ... | |
| 3869 | + if crypto_nesting >= crypto.MAX_NESTING_DEPTH: | |
| 3870 | + raise crypto.MaxCryptoNestingReached(crypto_nesting, filename) | |
| 3871 | + | |
| 3872 | + decrypted_file = None | |
| 3873 | + try: | |
| 3874 | + log.debug('Checking encryption passwords {}'.format(options.password)) | |
| 3875 | + passwords = options.password + \ | |
| 3876 | + [crypto.WRITE_PROTECT_ENCRYPTION_PASSWORD, ] | |
| 3877 | + decrypted_file = crypto.decrypt(filename, passwords) | |
| 3878 | + if not decrypted_file: | |
| 3879 | + raise crypto.WrongEncryptionPassword(filename) | |
| 3880 | + log.info('Working on decrypted file') | |
| 3881 | + return process_file(decrypted_file, data, container or filename, | |
| 3882 | + options, crypto_nesting+1) | |
| 3883 | + except Exception: | |
| 3884 | + raise | |
| 3885 | + finally: # clean up | |
| 3886 | + if decrypted_file is not None and os.path.isfile(decrypted_file): | |
| 3887 | + os.unlink(decrypted_file) | |
| 3888 | + | |
| 3889 | + | |
| 3793 | 3890 | def main(cmd_line_args=None): |
| 3794 | 3891 | """ |
| 3795 | 3892 | Main function, called when olevba is run from the command line |
| ... | ... | @@ -3824,35 +3921,44 @@ def main(cmd_line_args=None): |
| 3824 | 3921 | if options.output_mode == 'triage' and options.show_deobfuscated_code: |
| 3825 | 3922 | log.info('ignoring option --reveal in triage output mode') |
| 3826 | 3923 | |
| 3827 | - # Column headers (do not know how many files there will be yet, so if no output_mode | |
| 3828 | - # was specified, we will print triage for first file --> need these headers) | |
| 3829 | - if options.output_mode in ('triage', 'unspecified'): | |
| 3924 | + # gather info on all files that must be processed | |
| 3925 | + # ignore directory names stored in zip files: | |
| 3926 | + all_input_info = tuple((container, filename, data) for | |
| 3927 | + container, filename, data in xglob.iter_files( | |
| 3928 | + args, recursive=options.recursive, | |
| 3929 | + zip_password=options.zip_password, | |
| 3930 | + zip_fname=options.zip_fname) | |
| 3931 | + if not (container and filename.endswith('/'))) | |
| 3932 | + | |
| 3933 | + # specify output mode if options -t, -d and -j were not specified | |
| 3934 | + if options.output_mode == 'unspecified': | |
| 3935 | + if len(all_input_info) == 1: | |
| 3936 | + options.output_mode = 'detailed' | |
| 3937 | + else: | |
| 3938 | + options.output_mode = 'triage' | |
| 3939 | + | |
| 3940 | + # Column headers for triage mode | |
| 3941 | + if options.output_mode == 'triage': | |
| 3830 | 3942 | print('%-12s %-65s' % ('Flags', 'Filename')) |
| 3831 | 3943 | print('%-12s %-65s' % ('-' * 11, '-' * 65)) |
| 3832 | 3944 | |
| 3833 | 3945 | previous_container = None |
| 3834 | 3946 | count = 0 |
| 3835 | 3947 | container = filename = data = None |
| 3836 | - vba_parser = None | |
| 3837 | 3948 | return_code = RETURN_OK |
| 3838 | 3949 | try: |
| 3839 | - for container, filename, data in xglob.iter_files(args, recursive=options.recursive, | |
| 3840 | - zip_password=options.zip_password, zip_fname=options.zip_fname): | |
| 3841 | - # ignore directory names stored in zip files: | |
| 3842 | - if container and filename.endswith('/'): | |
| 3843 | - continue | |
| 3844 | - | |
| 3950 | + for container, filename, data in all_input_info: | |
| 3845 | 3951 | # handle errors from xglob |
| 3846 | 3952 | if isinstance(data, Exception): |
| 3847 | 3953 | if isinstance(data, PathNotFoundException): |
| 3848 | - if options.output_mode in ('triage', 'unspecified'): | |
| 3954 | + if options.output_mode == 'triage': | |
| 3849 | 3955 | print('%-12s %s - File not found' % ('?', filename)) |
| 3850 | 3956 | elif options.output_mode != 'json': |
| 3851 | 3957 | log.error('Given path %r does not exist!' % filename) |
| 3852 | 3958 | return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \ |
| 3853 | 3959 | else RETURN_SEVERAL_ERRS |
| 3854 | 3960 | else: |
| 3855 | - if options.output_mode in ('triage', 'unspecified'): | |
| 3961 | + if options.output_mode == 'triage': | |
| 3856 | 3962 | print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container)) |
| 3857 | 3963 | elif options.output_mode != 'json': |
| 3858 | 3964 | log.error('Exception opening/reading %r from zip file %r: %s' |
| ... | ... | @@ -3864,107 +3970,42 @@ def main(cmd_line_args=None): |
| 3864 | 3970 | error=type(data).__name__, message=str(data)) |
| 3865 | 3971 | continue |
| 3866 | 3972 | |
| 3867 | - try: | |
| 3868 | - # close the previous file if analyzing several: | |
| 3869 | - # (this must be done here to avoid closing the file if there is only 1, | |
| 3870 | - # to fix issue #219) | |
| 3871 | - if vba_parser is not None: | |
| 3872 | - vba_parser.close() | |
| 3873 | - # Open the file | |
| 3874 | - vba_parser = VBA_Parser_CLI(filename, data=data, container=container, | |
| 3875 | - relaxed=options.relaxed) | |
| 3876 | - | |
| 3877 | - if options.output_mode == 'detailed': | |
| 3878 | - # fully detailed output | |
| 3879 | - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, | |
| 3880 | - display_code=options.display_code, | |
| 3881 | - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3882 | - show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3883 | - deobfuscate=options.deobfuscate) | |
| 3884 | - elif options.output_mode in ('triage', 'unspecified'): | |
| 3885 | - # print container name when it changes: | |
| 3886 | - if container != previous_container: | |
| 3887 | - if container is not None: | |
| 3888 | - print('\nFiles in %s:' % container) | |
| 3889 | - previous_container = container | |
| 3890 | - # summarized output for triage: | |
| 3891 | - vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, | |
| 3892 | - deobfuscate=options.deobfuscate) | |
| 3893 | - elif options.output_mode == 'json': | |
| 3894 | - print_json( | |
| 3895 | - vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, | |
| 3896 | - display_code=options.display_code, | |
| 3897 | - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3898 | - show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3899 | - deobfuscate=options.deobfuscate)) | |
| 3900 | - else: # (should be impossible) | |
| 3901 | - raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) | |
| 3902 | - count += 1 | |
| 3903 | - | |
| 3904 | - except (SubstreamOpenError, UnexpectedDataError) as exc: | |
| 3905 | - if options.output_mode in ('triage', 'unspecified'): | |
| 3906 | - print('%-12s %s - Error opening substream or uenxpected ' \ | |
| 3907 | - 'content' % ('?', filename)) | |
| 3908 | - elif options.output_mode == 'json': | |
| 3909 | - print_json(file=filename, type='error', | |
| 3910 | - error=type(exc).__name__, message=str(exc)) | |
| 3911 | - else: | |
| 3912 | - log.exception('Error opening substream or unexpected ' | |
| 3913 | - 'content in %s' % filename) | |
| 3914 | - return_code = RETURN_OPEN_ERROR if return_code == 0 \ | |
| 3915 | - else RETURN_SEVERAL_ERRS | |
| 3916 | - except FileOpenError as exc: | |
| 3917 | - if options.output_mode in ('triage', 'unspecified'): | |
| 3918 | - print('%-12s %s - File format not supported' % ('?', filename)) | |
| 3919 | - elif options.output_mode == 'json': | |
| 3920 | - print_json(file=filename, type='error', | |
| 3921 | - error=type(exc).__name__, message=str(exc)) | |
| 3922 | - else: | |
| 3923 | - log.exception('Failed to open %s -- probably not supported!' % filename) | |
| 3924 | - return_code = RETURN_OPEN_ERROR if return_code == 0 \ | |
| 3925 | - else RETURN_SEVERAL_ERRS | |
| 3926 | - except ProcessingError as exc: | |
| 3927 | - if options.output_mode in ('triage', 'unspecified'): | |
| 3928 | - print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc)) | |
| 3929 | - elif options.output_mode == 'json': | |
| 3930 | - print_json(file=filename, type='error', | |
| 3931 | - error=type(exc).__name__, | |
| 3932 | - message=str(exc.orig_exc)) | |
| 3933 | - else: | |
| 3934 | - log.exception('Error processing file %s (%s)!' | |
| 3935 | - % (filename, exc.orig_exc)) | |
| 3936 | - return_code = RETURN_PARSE_ERROR if return_code == 0 \ | |
| 3937 | - else RETURN_SEVERAL_ERRS | |
| 3938 | - except FileIsEncryptedError as exc: | |
| 3939 | - if options.output_mode in ('triage', 'unspecified'): | |
| 3940 | - print('%-12s %s - File is encrypted' % ('!ERROR', filename)) | |
| 3941 | - elif options.output_mode == 'json': | |
| 3942 | - print_json(file=filename, type='error', | |
| 3943 | - error=type(exc).__name__, message=str(exc)) | |
| 3944 | - else: | |
| 3945 | - log.exception('File %s is encrypted!' % (filename)) | |
| 3946 | - return_code = RETURN_ENCRYPTED if return_code == 0 \ | |
| 3947 | - else RETURN_SEVERAL_ERRS | |
| 3948 | - # Here we do not close the vba_parser, because process_file may need it below. | |
| 3973 | + if options.output_mode == 'triage': | |
| 3974 | + # print container name when it changes: | |
| 3975 | + if container != previous_container: | |
| 3976 | + if container is not None: | |
| 3977 | + print('\nFiles in %s:' % container) | |
| 3978 | + previous_container = container | |
| 3979 | + | |
| 3980 | + # process the file, handling errors and encryption | |
| 3981 | + curr_return_code = process_file(filename, data, container, options) | |
| 3982 | + count += 1 | |
| 3983 | + | |
| 3984 | + # adjust overall return code | |
| 3985 | + if curr_return_code == RETURN_OK: | |
| 3986 | + continue # do not modify overall return code | |
| 3987 | + if return_code == RETURN_OK: | |
| 3988 | + return_code = curr_return_code # first error return code | |
| 3989 | + else: | |
| 3990 | + return_code = RETURN_SEVERAL_ERRS # several errors | |
| 3949 | 3991 | |
| 3950 | 3992 | if options.output_mode == 'triage': |
| 3951 | 3993 | print('\n(Flags: OpX=OpenXML, XML=Word2003XML, FlX=FlatOPC XML, MHT=MHTML, TXT=Text, M=Macros, ' \ |
| 3952 | 3994 | 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ |
| 3953 | 3995 | 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n') |
| 3954 | 3996 | |
| 3955 | - if count == 1 and options.output_mode == 'unspecified': | |
| 3956 | - # if options -t, -d and -j were not specified and it's a single file, print details: | |
| 3957 | - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, | |
| 3958 | - display_code=options.display_code, | |
| 3959 | - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3960 | - show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3961 | - deobfuscate=options.deobfuscate) | |
| 3962 | - | |
| 3963 | 3997 | if options.output_mode == 'json': |
| 3964 | 3998 | # print last json entry (a last one without a comma) and closing ] |
| 3965 | 3999 | print_json(type='MetaInformation', return_code=return_code, |
| 3966 | 4000 | n_processed=count, _json_is_last=True) |
| 3967 | 4001 | |
| 4002 | + except crypto.CryptoErrorBase as exc: | |
| 4003 | + log.exception('Problems with encryption in main: {}'.format(exc), | |
| 4004 | + exc_info=True) | |
| 4005 | + if return_code == RETURN_OK: | |
| 4006 | + return_code = RETURN_ENCRYPTED | |
| 4007 | + else: | |
| 4008 | + return_code == RETURN_SEVERAL_ERRS | |
| 3968 | 4009 | except Exception as exc: |
| 3969 | 4010 | # some unexpected error, maybe some of the types caught in except clauses |
| 3970 | 4011 | # above were not sufficient. This is very bad, so log complete trace at exception level | ... | ... |
oletools/ppt_record_parser.py
| ... | ... | @@ -63,7 +63,7 @@ except ImportError: |
| 63 | 63 | sys.path.insert(0, PARENT_DIR) |
| 64 | 64 | del PARENT_DIR |
| 65 | 65 | from oletools import record_base |
| 66 | -from oletools.common.errors import FileIsEncryptedError | |
| 66 | +from oletools.common.errors import CryptoErrorBase | |
| 67 | 67 | |
| 68 | 68 | |
| 69 | 69 | # types of relevant records (there are much more than listed here) |
| ... | ... | @@ -149,6 +149,10 @@ def is_ppt(filename): |
| 149 | 149 | Param filename can be anything that OleFileIO constructor accepts: name of |
| 150 | 150 | file or file data or data stream. |
| 151 | 151 | |
| 152 | + Will not try to decrypt the file not even try to determine whether it is | |
| 153 | + encrypted. If the file is encrypted will either raise an error or just | |
| 154 | + return `False`. | |
| 155 | + | |
| 152 | 156 | see also: oleid.OleID.check_powerpoint |
| 153 | 157 | """ |
| 154 | 158 | have_current_user = False |
| ... | ... | @@ -181,11 +185,8 @@ def is_ppt(filename): |
| 181 | 185 | return True |
| 182 | 186 | else: # ignore other streams/storages since they are optional |
| 183 | 187 | continue |
| 184 | - except FileIsEncryptedError: | |
| 185 | - assert ppt_file is not None, \ | |
| 186 | - 'Encryption error should not be raised from just opening OLE file.' | |
| 187 | - # just rely on stream names, copied from oleid | |
| 188 | - return ppt_file.exists('PowerPoint Document') | |
| 188 | + except CryptoErrorBase: | |
| 189 | + raise | |
| 189 | 190 | except Exception: |
| 190 | 191 | pass |
| 191 | 192 | return False | ... | ... |
oletools/record_base.py
| ... | ... | @@ -74,7 +74,6 @@ PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname( |
| 74 | 74 | if PARENT_DIR not in sys.path: |
| 75 | 75 | sys.path.insert(0, PARENT_DIR) |
| 76 | 76 | del PARENT_DIR |
| 77 | -from oletools.common.errors import FileIsEncryptedError | |
| 78 | 77 | from oletools import oleid |
| 79 | 78 | |
| 80 | 79 | |
| ... | ... | @@ -127,10 +126,9 @@ class OleRecordFile(olefile.OleFileIO): |
| 127 | 126 | """ |
| 128 | 127 | |
| 129 | 128 | def open(self, filename, *args, **kwargs): |
| 130 | - """Call OleFileIO.open, raise error if is encrypted.""" | |
| 129 | + """Call OleFileIO.open.""" | |
| 131 | 130 | #super(OleRecordFile, self).open(filename, *args, **kwargs) |
| 132 | 131 | OleFileIO.open(self, filename, *args, **kwargs) |
| 133 | - self.is_encrypted = oleid.OleID(self).check_encrypted().value | |
| 134 | 132 | |
| 135 | 133 | @classmethod |
| 136 | 134 | def stream_class_for_name(cls, stream_name): |
| ... | ... | @@ -163,8 +161,7 @@ class OleRecordFile(olefile.OleFileIO): |
| 163 | 161 | stream = clz(self._open(direntry.isectStart, direntry.size), |
| 164 | 162 | direntry.size, |
| 165 | 163 | None if is_orphan else direntry.name, |
| 166 | - direntry.entry_type, | |
| 167 | - self.is_encrypted) | |
| 164 | + direntry.entry_type) | |
| 168 | 165 | yield stream |
| 169 | 166 | stream.close() |
| 170 | 167 | |
| ... | ... | @@ -177,14 +174,13 @@ class OleRecordStream(object): |
| 177 | 174 | abstract base class |
| 178 | 175 | """ |
| 179 | 176 | |
| 180 | - def __init__(self, stream, size, name, stream_type, is_encrypted=False): | |
| 177 | + def __init__(self, stream, size, name, stream_type): | |
| 181 | 178 | self.stream = stream |
| 182 | 179 | self.size = size |
| 183 | 180 | self.name = name |
| 184 | 181 | if stream_type not in ENTRY_TYPE2STR: |
| 185 | 182 | raise ValueError('Unknown stream type: {0}'.format(stream_type)) |
| 186 | 183 | self.stream_type = stream_type |
| 187 | - self.is_encrypted = is_encrypted | |
| 188 | 184 | |
| 189 | 185 | def read_record_head(self): |
| 190 | 186 | """ read first few bytes of record to determine size and type |
| ... | ... | @@ -213,9 +209,6 @@ class OleRecordStream(object): |
| 213 | 209 | |
| 214 | 210 | Stream must be positioned at start of records (e.g. start of stream). |
| 215 | 211 | """ |
| 216 | - if self.is_encrypted: | |
| 217 | - raise FileIsEncryptedError() | |
| 218 | - | |
| 219 | 212 | while True: |
| 220 | 213 | # unpacking as in olevba._extract_vba |
| 221 | 214 | pos = self.stream.tell() | ... | ... |
oletools/xls_parser.py
| ... | ... | @@ -101,7 +101,7 @@ def read_unicode(data, start_idx, n_chars): |
| 101 | 101 | """ read a unicode string from a XLUnicodeStringNoCch structure """ |
| 102 | 102 | # first bit 0x0 --> only low-bytes are saved, all high bytes are 0 |
| 103 | 103 | # first bit 0x1 --> 2 bytes per character |
| 104 | - low_bytes_only = (ord(data[start_idx]) == 0) | |
| 104 | + low_bytes_only = (ord(data[start_idx:start_idx+1]) == 0) | |
| 105 | 105 | if low_bytes_only: |
| 106 | 106 | end_idx = start_idx + 1 + n_chars |
| 107 | 107 | return data[start_idx+1:end_idx].decode('ascii'), end_idx |
| ... | ... | @@ -349,6 +349,7 @@ class XlsRecordSupBook(XlsRecord): |
| 349 | 349 | LINK_TYPE_EXTERNAL = 'external workbook' |
| 350 | 350 | |
| 351 | 351 | def finish_constructing(self, _): |
| 352 | + """Finish constructing this record; called at end of constructor.""" | |
| 352 | 353 | # set defaults |
| 353 | 354 | self.ctab = None |
| 354 | 355 | self.cch = None | ... | ... |
setup.py
| ... | ... | @@ -28,6 +28,7 @@ to install this package. |
| 28 | 28 | # 2018-09-15 PL: - easygui is now a dependency |
| 29 | 29 | # 2018-09-22 PL: - colorclass is now a dependency |
| 30 | 30 | # 2018-10-27 PL: - fixed issue #359 (bug when importing log_helper) |
| 31 | +# 2019-02-26 CH: - add optional dependency msoffcrypto for decryption | |
| 31 | 32 | |
| 32 | 33 | #--- TODO --------------------------------------------------------------------- |
| 33 | 34 | |
| ... | ... | @@ -317,6 +318,10 @@ def main(): |
| 317 | 318 | "easygui", |
| 318 | 319 | 'colorclass', |
| 319 | 320 | ], |
| 321 | + extras_require = { | |
| 322 | + # msoffcrypto-tools by nolze can be used to decrypt some office files | |
| 323 | + 'decrypt': ['msoffcrypto'] | |
| 324 | + } | |
| 320 | 325 | ) |
| 321 | 326 | |
| 322 | 327 | ... | ... |
tests/common/log_helper/test_log_helper.py
| ... | ... | @@ -13,9 +13,11 @@ from tests.common.log_helper import log_helper_test_main |
| 13 | 13 | from tests.common.log_helper import log_helper_test_imported |
| 14 | 14 | from os.path import dirname, join, relpath, abspath |
| 15 | 15 | |
| 16 | +from tests.test_utils import PROJECT_ROOT | |
| 17 | + | |
| 16 | 18 | # this is the common base of "tests" and "oletools" dirs |
| 17 | -ROOT_DIRECTORY = abspath(join(__file__, '..', '..', '..', '..')) | |
| 18 | -TEST_FILE = relpath(join(dirname(__file__), 'log_helper_test_main.py'), ROOT_DIRECTORY) | |
| 19 | +TEST_FILE = relpath(join(dirname(abspath(__file__)), 'log_helper_test_main.py'), | |
| 20 | + PROJECT_ROOT) | |
| 19 | 21 | PYTHON_EXECUTABLE = sys.executable |
| 20 | 22 | |
| 21 | 23 | MAIN_LOG_MESSAGES = [ |
| ... | ... | @@ -90,9 +92,9 @@ class TestLogHelper(unittest.TestCase): |
| 90 | 92 | child = subprocess.Popen( |
| 91 | 93 | [PYTHON_EXECUTABLE, TEST_FILE] + args, |
| 92 | 94 | shell=False, |
| 93 | - env={'PYTHONPATH': ROOT_DIRECTORY}, | |
| 95 | + env={'PYTHONPATH': PROJECT_ROOT}, | |
| 94 | 96 | universal_newlines=True, |
| 95 | - cwd=ROOT_DIRECTORY, | |
| 97 | + cwd=PROJECT_ROOT, | |
| 96 | 98 | stdin=None, |
| 97 | 99 | stdout=subprocess.PIPE, |
| 98 | 100 | stderr=subprocess.PIPE | ... | ... |
tests/msodde/test_basic.py
| ... | ... | @@ -123,7 +123,7 @@ class TestDdeLinks(unittest.TestCase): |
| 123 | 123 | |
| 124 | 124 | def test_excel(self): |
| 125 | 125 | """ check that dde links are found in excel 2007+ files """ |
| 126 | - expect = ['DDE-Link cmd /c calc.exe', ] | |
| 126 | + expect = ['cmd /c calc.exe', ] | |
| 127 | 127 | for extn in 'xlsx', 'xlsm', 'xlsb': |
| 128 | 128 | output = msodde.process_file( |
| 129 | 129 | join(BASE_DIR, 'msodde', 'dde-test.' + extn), msodde.FIELD_FILTER_BLACKLIST) | ... | ... |
tests/msodde/test_crypto.py
0 → 100644
| 1 | +"""Check decryption of files from msodde works.""" | |
| 2 | + | |
| 3 | +import sys | |
| 4 | +import unittest | |
| 5 | +from os.path import join as pjoin | |
| 6 | + | |
| 7 | +from tests.test_utils import DATA_BASE_DIR | |
| 8 | + | |
| 9 | +from oletools import crypto | |
| 10 | +from oletools import msodde | |
| 11 | + | |
| 12 | + | |
| 13 | +@unittest.skipIf(not crypto.check_msoffcrypto(), | |
| 14 | + 'Module msoffcrypto not installed for python{}.{}' | |
| 15 | + .format(sys.version_info.major, sys.version_info.minor)) | |
| 16 | +class MsoddeCryptoTest(unittest.TestCase): | |
| 17 | + """Test integration of decryption in msodde.""" | |
| 18 | + def test_standard_password(self): | |
| 19 | + """Check dde-link is found in xls[mb] sample files.""" | |
| 20 | + for suffix in 'xls', 'xlsx', 'xlsm', 'xlsb': | |
| 21 | + example_file = pjoin(DATA_BASE_DIR, 'encrypted', | |
| 22 | + 'dde-test-encrypt-standardpassword.' + suffix) | |
| 23 | + link_text = msodde.process_maybe_encrypted(example_file) | |
| 24 | + self.assertEqual(link_text, 'cmd /c calc.exe', | |
| 25 | + msg='Unexpected output {!r} for {}' | |
| 26 | + .format(link_text, suffix)) | |
| 27 | + | |
| 28 | + | |
| 29 | +if __name__ == '__main__': | |
| 30 | + unittest.main() | ... | ... |
tests/olevba/test_basic.py
| ... | ... | @@ -28,7 +28,15 @@ class TestOlevbaBasic(unittest.TestCase): |
| 28 | 28 | CRYPT_DIR = join(DATA_BASE_DIR, 'encrypted') |
| 29 | 29 | CRYPT_RETURN_CODE = 9 |
| 30 | 30 | ADD_ARGS = [], ['-d', ], ['-a', ], ['-j', ], ['-t', ] |
| 31 | + EXCEPTIONS = ['autostart-encrypt-standardpassword.xlsm', # These ... | |
| 32 | + 'autostart-encrypt-standardpassword.xlsb', # files ... | |
| 33 | + 'dde-test-encrypt-standardpassword.xls', # are ... | |
| 34 | + 'dde-test-encrypt-standardpassword.xlsx', # decrypted | |
| 35 | + 'dde-test-encrypt-standardpassword.xlsm', # per ... | |
| 36 | + 'dde-test-encrypt-standardpassword.xlsb'] # default. | |
| 31 | 37 | for filename in os.listdir(CRYPT_DIR): |
| 38 | + if filename in EXCEPTIONS: | |
| 39 | + continue | |
| 32 | 40 | full_name = join(CRYPT_DIR, filename) |
| 33 | 41 | for args in ADD_ARGS: |
| 34 | 42 | try: | ... | ... |
tests/olevba/test_crypto.py
0 → 100644
| 1 | +"""Check decryption of files from olevba works.""" | |
| 2 | + | |
| 3 | +import sys | |
| 4 | +import unittest | |
| 5 | +import os | |
| 6 | +from os.path import join as pjoin | |
| 7 | +from subprocess import check_output, CalledProcessError | |
| 8 | +import json | |
| 9 | +from collections import OrderedDict | |
| 10 | + | |
| 11 | +from tests.test_utils import DATA_BASE_DIR, SOURCE_BASE_DIR | |
| 12 | + | |
| 13 | +from oletools import crypto | |
| 14 | + | |
| 15 | + | |
| 16 | +@unittest.skipIf(not crypto.check_msoffcrypto(), | |
| 17 | + 'Module msoffcrypto not installed for python{}.{}' | |
| 18 | + .format(sys.version_info.major, sys.version_info.minor)) | |
| 19 | +class OlevbaCryptoWriteProtectTest(unittest.TestCase): | |
| 20 | + """ | |
| 21 | + Test documents that are 'write-protected' through encryption. | |
| 22 | + | |
| 23 | + Excel has a way to 'write-protect' documents by encrypting them with a | |
| 24 | + hard-coded standard password. When looking at the file-structure you see | |
| 25 | + an OLE-file with streams `EncryptedPackage`, `StrongEncryptionSpace`, and | |
| 26 | + `EncryptionInfo`. Contained in the first is the actual file. When opening | |
| 27 | + such a file in excel, it is decrypted without the user noticing. | |
| 28 | + | |
| 29 | + Olevba should detect such encryption, try to decrypt with the standard | |
| 30 | + password and look for VBA code in the decrypted file. | |
| 31 | + | |
| 32 | + All these tests are skipped if the module `msoffcrypto-tools` is not | |
| 33 | + installed. | |
| 34 | + """ | |
| 35 | + def test_autostart(self): | |
| 36 | + """Check that autostart macro is found in xls[mb] sample file.""" | |
| 37 | + # create a PYTHONPATH environment var to prefer our olevba | |
| 38 | + env = os.environ | |
| 39 | + try: | |
| 40 | + env['PYTHONPATH'] = SOURCE_BASE_DIR + os.pathsep + \ | |
| 41 | + os.environ['PYTHONPATH'] | |
| 42 | + except KeyError: | |
| 43 | + env['PYTHONPATH'] = SOURCE_BASE_DIR | |
| 44 | + | |
| 45 | + for suffix in 'xlsm', 'xlsb': | |
| 46 | + example_file = pjoin( | |
| 47 | + DATA_BASE_DIR, 'encrypted', | |
| 48 | + 'autostart-encrypt-standardpassword.' + suffix) | |
| 49 | + try: | |
| 50 | + output = check_output([sys.executable, '-m', 'olevba', '-j', | |
| 51 | + example_file], | |
| 52 | + universal_newlines=True, env=env) | |
| 53 | + except CalledProcessError as err: | |
| 54 | + print(err.output) | |
| 55 | + raise | |
| 56 | + data = json.loads(output, object_pairs_hook=OrderedDict) | |
| 57 | + # debug: json.dump(data, sys.stdout, indent=4) | |
| 58 | + self.assertEqual(len(data), 4) | |
| 59 | + self.assertIn('script_name', data[0]) | |
| 60 | + self.assertIn('version', data[0]) | |
| 61 | + self.assertEqual(data[0]['type'], 'MetaInformation') | |
| 62 | + self.assertIn('return_code', data[-1]) | |
| 63 | + self.assertEqual(data[-1]['type'], 'MetaInformation') | |
| 64 | + self.assertEqual(data[1]['container'], None) | |
| 65 | + self.assertEqual(data[1]['file'], example_file) | |
| 66 | + self.assertEqual(data[1]['analysis'], None) | |
| 67 | + self.assertEqual(data[1]['macros'], []) | |
| 68 | + self.assertEqual(data[1]['type'], 'OLE') | |
| 69 | + self.assertEqual(data[2]['container'], example_file) | |
| 70 | + self.assertNotEqual(data[2]['file'], example_file) | |
| 71 | + self.assertEqual(data[2]['type'], "OpenXML") | |
| 72 | + analysis = data[2]['analysis'] | |
| 73 | + self.assertEqual(analysis[0]['type'], 'AutoExec') | |
| 74 | + self.assertEqual(analysis[0]['keyword'], 'Auto_Open') | |
| 75 | + macros = data[2]['macros'] | |
| 76 | + self.assertEqual(macros[0]['vba_filename'], 'Modul1.bas') | |
| 77 | + self.assertIn('Sub Auto_Open()', macros[0]['code']) | |
| 78 | + | |
| 79 | + | |
| 80 | +if __name__ == '__main__': | |
| 81 | + unittest.main() | ... | ... |
tests/ppt_parser/test_basic.py
| ... | ... | @@ -16,7 +16,7 @@ class TestBasic(unittest.TestCase): |
| 16 | 16 | |
| 17 | 17 | def test_is_ppt(self): |
| 18 | 18 | """ test ppt_record_parser.is_ppt(filename) """ |
| 19 | - exceptions = [] | |
| 19 | + exceptions = ['encrypted.ppt', ] # actually is ppt but embedded | |
| 20 | 20 | for base_dir, _, files in os.walk(DATA_BASE_DIR): |
| 21 | 21 | for filename in files: |
| 22 | 22 | if filename in exceptions: | ... | ... |
tests/test-data/encrypted/autostart-encrypt-standardpassword.xlsb
0 → 100755
No preview for this file type
tests/test-data/encrypted/autostart-encrypt-standardpassword.xlsm
0 → 100755
No preview for this file type
tests/test-data/encrypted/dde-test-encrypt-standardpassword.xls
0 → 100755
No preview for this file type
tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsb
0 → 100755
No preview for this file type
tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsm
0 → 100755
No preview for this file type
tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsx
0 → 100755
No preview for this file type
tests/test_utils/__init__.py
| 1 | -from os.path import dirname, join | |
| 1 | +from os.path import dirname, join, abspath | |
| 2 | + | |
| 3 | +# Base dir of project, contains subdirs "tests" and "oletools" and README.md | |
| 4 | +PROJECT_ROOT = dirname(dirname(dirname(abspath(__file__)))) | |
| 2 | 5 | |
| 3 | 6 | # Directory with test data, independent of current working directory |
| 4 | -DATA_BASE_DIR = join(dirname(dirname(__file__)), 'test-data') | |
| 7 | +DATA_BASE_DIR = join(PROJECT_ROOT, 'tests', 'test-data') | |
| 8 | + | |
| 9 | +# Directory with source code | |
| 10 | +SOURCE_BASE_DIR = join(PROJECT_ROOT, 'oletools') | ... | ... |