Commit 88c2b0a0217e0f5844d6ad078fa1da265dcafbcb

Authored by Philippe Lagadec
Committed by GitHub
2 parents c747acac 1a2c90ee

Merge pull request #403 from christian-intra2net/crypto-write-protect

Integrate decrypt from msoffcrypto-tools
.travis.yml
... ... @@ -17,5 +17,8 @@ matrix:
17 17 - python: pypy
18 18 - python: pypy3
19 19  
  20 +install:
  21 + - pip install msoffcrypto-tool
  22 +
20 23 script:
21 24 - python setup.py test
... ...
oletools/common/errors.py
... ... @@ -4,10 +4,42 @@ Errors used in several tools to avoid duplication
4 4 .. codeauthor:: Intra2net AG <info@intra2net.com>
5 5 """
6 6  
7   -class FileIsEncryptedError(ValueError):
  7 +class CryptoErrorBase(ValueError):
  8 + """Base class for crypto-based exceptions."""
  9 + pass
  10 +
  11 +
  12 +class CryptoLibNotImported(CryptoErrorBase, ImportError):
  13 + """Exception thrown if msoffcrypto is needed but could not be imported."""
  14 +
  15 + def __init__(self):
  16 + super(CryptoLibNotImported, self).__init__(
  17 + 'msoffcrypto-tools could not be imported')
  18 +
  19 +
  20 +class UnsupportedEncryptionError(CryptoErrorBase):
8 21 """Exception thrown if file is encrypted and cannot deal with it."""
9   - # see also: same class in olevba[3] and record_base
10 22 def __init__(self, filename=None):
11   - super(FileIsEncryptedError, self).__init__(
  23 + super(UnsupportedEncryptionError, self).__init__(
12 24 'Office file {}is encrypted, not yet supported'
13 25 .format('' if filename is None else filename + ' '))
  26 +
  27 +
  28 +class WrongEncryptionPassword(CryptoErrorBase):
  29 + """Exception thrown if encryption could be handled but passwords wrong."""
  30 + def __init__(self, filename=None):
  31 + super(WrongEncryptionPassword, self).__init__(
  32 + 'Given passwords could not decrypt office file{}'
  33 + .format('' if filename is None else ' ' + filename))
  34 +
  35 +
  36 +class MaxCryptoNestingReached(CryptoErrorBase):
  37 + """
  38 + Exception thrown if decryption is too deeply layered.
  39 +
  40 + (...or decrypt code creates inf loop)
  41 + """
  42 + def __init__(self, n_layers, filename=None):
  43 + super(MaxCryptoNestingReached, self).__init__(
  44 + 'Encountered more than {} layers of encryption for office file{}'
  45 + .format(n_layers, '' if filename is None else ' ' + filename))
... ...
oletools/crypto.py 0 → 100644
  1 +#!/usr/bin/env python
  2 +"""
  3 +crypto.py
  4 +
  5 +Module to be used by other scripts and modules in oletools, that provides
  6 +information on encryption in OLE files.
  7 +
  8 +Uses :py:mod:`msoffcrypto-tool` to decrypt if it is available. Otherwise
  9 +decryption will fail with an ImportError.
  10 +
  11 +Encryption/Write-Protection can be realized in many different ways. They range
  12 +from setting a single flag in an otherwise unprotected file to embedding a
  13 +regular file (e.g. xlsx) in an EncryptedStream inside an OLE file. That means
  14 +that (1) that lots of bad things are accesible even if no encryption password
  15 +is known, and (2) even basic attributes like the file type can change by
  16 +decryption. Therefore I suggest the following general routine to deal with
  17 +potentially encrypted files::
  18 +
  19 + def script_main_function(input_file, passwords, crypto_nesting=0, args):
  20 + '''Wrapper around main function to deal with encrypted files.'''
  21 + initial_stuff(input_file, args)
  22 + result = None
  23 + try:
  24 + result = do_your_thing_assuming_no_encryption(input_file)
  25 + if not crypto.is_encrypted(input_file):
  26 + return result
  27 + except Exception:
  28 + if not crypto.is_encrypted(input_file):
  29 + raise
  30 + # we reach this point only if file is encrypted
  31 + # check if this is an encrypted file in an encrypted file in an ...
  32 + if crypto_nesting >= crypto.MAX_NESTING_DEPTH:
  33 + raise crypto.MaxCryptoNestingReached(crypto_nesting, filename)
  34 + decrypted_file = None
  35 + try:
  36 + decrypted_file = crypto.decrypt(input_file, passwords)
  37 + # might still be encrypted, so call this again recursively
  38 + result = script_main_function(decrypted_file, passwords,
  39 + crypto_nesting+1, args)
  40 + except Exception:
  41 + raise
  42 + finally: # clean up
  43 + try: # (maybe file was not yet created)
  44 + os.unlink(decrypted_file)
  45 + except Exception:
  46 + pass
  47 +
  48 +(Realized e.g. in :py:mod:`oletools.msodde`).
  49 +That means that caller code needs another wrapper around its main function. I
  50 +did try it another way first (a transparent on-demand unencrypt) but for the
  51 +above reasons I believe this is the better way. Also, non-top-level-code can
  52 +just assume that it works on unencrypted data and fail with an exception if
  53 +encrypted data makes its work impossible. No need to check `if is_encrypted()`
  54 +at the start of functions.
  55 +
  56 +.. seealso:: [MS-OFFCRYPTO]
  57 +.. seealso:: https://github.com/nolze/msoffcrypto-tool
  58 +
  59 +crypto is part of the python-oletools package:
  60 +http://www.decalage.info/python/oletools
  61 +"""
  62 +
  63 +# === LICENSE =================================================================
  64 +
  65 +# crypto is copyright (c) 2014-2019 Philippe Lagadec (http://www.decalage.info)
  66 +# All rights reserved.
  67 +#
  68 +# Redistribution and use in source and binary forms, with or without
  69 +# modification, are permitted provided that the following conditions are met:
  70 +#
  71 +# * Redistributions of source code must retain the above copyright notice,
  72 +# this list of conditions and the following disclaimer.
  73 +# * Redistributions in binary form must reproduce the above copyright notice,
  74 +# this list of conditions and the following disclaimer in the documentation
  75 +# and/or other materials provided with the distribution.
  76 +#
  77 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  78 +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  79 +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  80 +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  81 +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  82 +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  83 +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  84 +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  85 +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  86 +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  87 +# POSSIBILITY OF SUCH DAMAGE.
  88 +
  89 +# -----------------------------------------------------------------------------
  90 +# CHANGELOG:
  91 +# 2019-02-14 v0.01 CH: - first version with encryption check from oleid
  92 +
  93 +__version__ = '0.01'
  94 +
  95 +import sys
  96 +import struct
  97 +import os
  98 +from os.path import splitext, isfile
  99 +from tempfile import mkstemp
  100 +import zipfile
  101 +from oletools.common.errors import CryptoErrorBase, WrongEncryptionPassword, \
  102 + UnsupportedEncryptionError, MaxCryptoNestingReached, CryptoLibNotImported
  103 +from olefile import OleFileIO
  104 +
  105 +try:
  106 + import msoffcrypto
  107 +except ImportError:
  108 + msoffcrypto = None
  109 +
  110 +
  111 +#: if there is an encrypted file embedded in an encrypted file,
  112 +#: how deep down do we go
  113 +MAX_NESTING_DEPTH = 10
  114 +
  115 +
  116 +def is_encrypted(some_file):
  117 + """
  118 + Determine whether document contains encrypted content.
  119 +
  120 + This should return False for documents that are just write-protected or
  121 + signed or finalized. It should return True if ANY content of the file is
  122 + encrypted and can therefore not be analyzed by other oletools modules
  123 + without given a password.
  124 +
  125 + Exception: there are way to write-protect an office document by embedding
  126 + it as encrypted stream with hard-coded standard password into an otherwise
  127 + empty OLE file. From an office user point of view, this is no encryption,
  128 + but regarding file structure this is encryption, so we return `True` for
  129 + these.
  130 +
  131 + This should not raise exceptions needlessly.
  132 +
  133 + This implementation is rather simple: it returns True if the file contains
  134 + streams with typical encryption names (c.f. [MS-OFFCRYPTO]). It does not
  135 + test whether these streams actually contain data or whether the ole file
  136 + structure contains the necessary references to these. It also checks the
  137 + "well-known property" PIDSI_DOC_SECURITY if the SummaryInformation stream
  138 + is accessible (c.f. [MS-OLEPS] 2.25.1)
  139 +
  140 + :param some_file: File name or an opened OleFileIO
  141 + :type some_file: :py:class:`olefile.OleFileIO` or `str`
  142 + :returns: True if (and only if) the file contains encrypted content
  143 + """
  144 + if not isinstance(some_file, str):
  145 + return is_encrypted_ole(some_file) # assume it is OleFileIO
  146 + if zipfile.is_zipfile(some_file):
  147 + return is_encrypted_zip(some_file)
  148 + # otherwise assume it is the name of an ole file
  149 + return is_encrypted_ole(OleFileIO(some_file))
  150 +
  151 +
  152 +def is_encrypted_zip(filename):
  153 + """Specialization of :py:func:`is_encrypted` for zip-based files."""
  154 + # try to decrypt a few bytes from first entry
  155 + with zipfile.ZipFile(filename, 'r') as zipper:
  156 + first_entry = zipper.infolist()[0]
  157 + try:
  158 + with zipper.open(first_entry, 'r') as reader:
  159 + reader.read(min(16, first_entry.file_size))
  160 + return False
  161 + except RuntimeError as rt_err:
  162 + return 'crypt' in str(rt_err)
  163 +
  164 +
  165 +def is_encrypted_ole(ole):
  166 + """Specialization of :py:func:`is_encrypted` for ole files."""
  167 + # check well known property for password protection
  168 + # (this field may be missing for Powerpoint2000, for example)
  169 + # TODO: check whether password protection always implies encryption. Could
  170 + # write-protection or signing with password trigger this as well?
  171 + if ole.exists("\x05SummaryInformation"):
  172 + suminfo_data = ole.getproperties("\x05SummaryInformation")
  173 + if 0x13 in suminfo_data and (suminfo_data[0x13] & 1):
  174 + return True
  175 +
  176 + # check a few stream names
  177 + # TODO: check whether these actually contain data and whether other
  178 + # necessary properties exist / are set
  179 + elif ole.exists('EncryptionInfo'):
  180 + return True
  181 + # or an encrypted ppt file
  182 + elif ole.exists('EncryptedSummary') and \
  183 + not ole.exists('SummaryInformation'):
  184 + return True
  185 +
  186 + # Word-specific old encryption:
  187 + if ole.exists('WordDocument'):
  188 + # check for Word-specific encryption flag:
  189 + stream = None
  190 + try:
  191 + stream = ole.openstream(["WordDocument"])
  192 + # pass header 10 bytes
  193 + stream.read(10)
  194 + # read flag structure:
  195 + temp16 = struct.unpack("H", stream.read(2))[0]
  196 + f_encrypted = (temp16 & 0x0100) >> 8
  197 + if f_encrypted:
  198 + return True
  199 + except Exception:
  200 + raise
  201 + finally:
  202 + if stream is not None:
  203 + stream.close()
  204 +
  205 + # no indication of encryption
  206 + return False
  207 +
  208 +
  209 +#: one way to achieve "write protection" in office files is to encrypt the file
  210 +#: using this password
  211 +WRITE_PROTECT_ENCRYPTION_PASSWORD = 'VelvetSweatshop'
  212 +
  213 +
  214 +def _check_msoffcrypto():
  215 + """Raise a :py:class:`CryptoLibNotImported` if msoffcrypto not imported."""
  216 + if msoffcrypto is None:
  217 + raise CryptoLibNotImported()
  218 +
  219 +
  220 +def check_msoffcrypto():
  221 + """Return `True` iff :py:mod:`msoffcrypto` could be imported."""
  222 + return msoffcrypto is not None
  223 +
  224 +
  225 +def decrypt(filename, passwords=None, **temp_file_args):
  226 + """
  227 + Try to decrypt an encrypted file
  228 +
  229 + This function tries to decrypt the given file using a given set of
  230 + passwords. If no password is given, tries the standard password for write
  231 + protection. Creates a file with decrypted data whose file name is returned.
  232 + If the decryption fails, None is returned.
  233 +
  234 + :param str filename: path to an ole file on disc
  235 + :param passwords: list/set/tuple/... of passwords or a single password or
  236 + None
  237 + :type passwords: iterable or str or None
  238 + :param temp_file_args: arguments for :py:func:`tempfile.mkstemp` e.g.,
  239 + `dirname` or `prefix`. `suffix` will default to
  240 + suffix of input `filename`, `prefix` defaults to
  241 + `oletools-decrypt-`; `text` will be ignored
  242 + :returns: name of the decrypted temporary file.
  243 + :raises: :py:class:`ImportError` if :py:mod:`msoffcrypto-tools` not found
  244 + :raises: :py:class:`ValueError` if the given file is not encrypted
  245 + """
  246 + _check_msoffcrypto()
  247 +
  248 + # normalize password so we always have a list/tuple
  249 + if isinstance(passwords, str):
  250 + passwords = (passwords, )
  251 + elif not passwords:
  252 + passwords = (WRITE_PROTECT_ENCRYPTION_PASSWORD, )
  253 +
  254 + # check temp file args
  255 + if 'prefix' not in temp_file_args:
  256 + temp_file_args['prefix'] = 'oletools-decrypt-'
  257 + if 'suffix' not in temp_file_args:
  258 + temp_file_args['suffix'] = splitext(filename)[1]
  259 + temp_file_args['text'] = False
  260 +
  261 + decrypt_file = None
  262 + with open(filename, 'rb') as reader:
  263 + try:
  264 + crypto_file = msoffcrypto.OfficeFile(reader)
  265 + except Exception as exc: # e.g. ppt, not yet supported by msoffcrypto
  266 + if 'Unrecognized file format' in str(exc):
  267 + # raise different exception without stack trace of original exc
  268 + if sys.version_info.major == 2:
  269 + raise UnsupportedEncryptionError(filename)
  270 + else:
  271 + # this is a syntax error in python 2, so wrap it in exec()
  272 + exec('raise UnsupportedEncryptionError(filename) from None')
  273 + else:
  274 + raise
  275 + if not crypto_file.is_encrypted():
  276 + raise ValueError('Given input file {} is not encrypted!'
  277 + .format(filename))
  278 +
  279 + for password in passwords:
  280 + write_descriptor = None
  281 + write_handle = None
  282 + decrypt_file = None
  283 + try:
  284 + crypto_file.load_key(password=password)
  285 +
  286 + # create temp file
  287 + write_descriptor, decrypt_file = mkstemp(**temp_file_args)
  288 + write_handle = os.fdopen(write_descriptor, 'wb')
  289 + write_descriptor = None # is now handled via write_handle
  290 + crypto_file.decrypt(write_handle)
  291 +
  292 + # decryption was successfull; clean up and return
  293 + write_handle.close()
  294 + write_handle = None
  295 + break
  296 + except Exception:
  297 + # error-clean up: close everything and del temp file
  298 + if write_handle:
  299 + write_handle.close()
  300 + elif write_descriptor:
  301 + os.close(write_descriptor)
  302 + if decrypt_file and isfile(decrypt_file):
  303 + os.unlink(decrypt_file)
  304 + decrypt_file = None
  305 + # if we reach this, all passwords were tried without success
  306 + return decrypt_file
... ...
oletools/msodde.py
... ... @@ -11,7 +11,6 @@ Supported formats:
11 11 - RTF
12 12 - CSV (exported from / imported into Excel)
13 13 - XML (exported from Word 2003, Word 2007+, Excel 2003, (Excel 2007+?)
14   -- raises an error if run with files encrypted using MS Crypto API RC4
15 14  
16 15 Author: Philippe Lagadec - http://www.decalage.info
17 16 License: BSD, see source code or documentation
... ... @@ -52,7 +51,6 @@ from __future__ import print_function
52 51  
53 52 import argparse
54 53 import os
55   -from os.path import abspath, dirname
56 54 import sys
57 55 import re
58 56 import csv
... ... @@ -62,9 +60,9 @@ import olefile
62 60 from oletools import ooxml
63 61 from oletools import xls_parser
64 62 from oletools import rtfobj
65   -from oletools import oleid
  63 +from oletools.ppt_record_parser import is_ppt
  64 +from oletools import crypto
66 65 from oletools.common.log_helper import log_helper
67   -from oletools.common.errors import FileIsEncryptedError
68 66  
69 67 # -----------------------------------------------------------------------------
70 68 # CHANGELOG:
... ... @@ -305,6 +303,9 @@ def process_args(cmd_line_args=None):
305 303 default=DEFAULT_LOG_LEVEL,
306 304 help="logging level debug/info/warning/error/critical "
307 305 "(default=%(default)s)")
  306 + parser.add_argument("-p", "--password", type=str, action='append',
  307 + help='if encrypted office files are encountered, try '
  308 + 'decryption with this password. May be repeated.')
308 309 filter_group = parser.add_argument_group(
309 310 title='Filter which OpenXML field commands are returned',
310 311 description='Only applies to OpenXML (e.g. docx) and rtf, not to OLE '
... ... @@ -352,10 +353,9 @@ def process_doc_field(data):
352 353  
353 354 if data.lstrip().lower().startswith(u'dde'):
354 355 return data
355   - elif data.lstrip().lower().startswith(u'\x00d\x00d\x00e\x00'):
  356 + if data.lstrip().lower().startswith(u'\x00d\x00d\x00e\x00'):
356 357 return data
357   - else:
358   - return u''
  358 + return u''
359 359  
360 360  
361 361 OLE_FIELD_START = 0x13
... ... @@ -379,7 +379,7 @@ def process_doc_stream(stream):
379 379 while True:
380 380 idx += 1
381 381 char = stream.read(1) # loop over every single byte
382   - if len(char) == 0:
  382 + if len(char) == 0: # pylint: disable=len-as-condition
383 383 break
384 384 else:
385 385 char = ord(char)
... ... @@ -417,7 +417,7 @@ def process_doc_stream(stream):
417 417 pass
418 418 elif len(field_contents) > OLE_FIELD_MAX_SIZE:
419 419 logger.debug('field exceeds max size of {0}. Ignore rest'
420   - .format(OLE_FIELD_MAX_SIZE))
  420 + .format(OLE_FIELD_MAX_SIZE))
421 421 max_size_exceeded = True
422 422  
423 423 # appending a raw byte to a unicode string here. Not clean but
... ... @@ -437,7 +437,7 @@ def process_doc_stream(stream):
437 437 logger.debug('big field was not a field after all')
438 438  
439 439 logger.debug('Checked {0} characters, found {1} fields'
440   - .format(idx, len(result_parts)))
  440 + .format(idx, len(result_parts)))
441 441  
442 442 return result_parts
443 443  
... ... @@ -462,11 +462,10 @@ def process_doc(ole):
462 462 direntry = ole._load_direntry(sid)
463 463 is_stream = direntry.entry_type == olefile.STGTY_STREAM
464 464 logger.debug('direntry {:2d} {}: {}'
465   - .format(sid, '[orphan]' if is_orphan else direntry.name,
466   - 'is stream of size {}'.format(direntry.size)
467   - if is_stream else
468   - 'no stream ({})'
469   - .format(direntry.entry_type)))
  465 + .format(sid, '[orphan]' if is_orphan else direntry.name,
  466 + 'is stream of size {}'.format(direntry.size)
  467 + if is_stream else
  468 + 'no stream ({})'.format(direntry.entry_type)))
470 469 if is_stream:
471 470 new_parts = process_doc_stream(
472 471 ole._open(direntry.isectStart, direntry.size))
... ... @@ -525,7 +524,8 @@ def process_docx(filepath, field_filter_mode=None):
525 524 else:
526 525 elem = curr_elem
527 526 if elem is None:
528   - raise BadOOXML(filepath, 'Got "None"-Element from iter_xml')
  527 + raise ooxml.BadOOXML(filepath,
  528 + 'Got "None"-Element from iter_xml')
529 529  
530 530 # check if FLDCHARTYPE and whether "begin" or "end" tag
531 531 attrib_type = elem.attrib.get(ATTR_W_FLDCHARTYPE[0]) or \
... ... @@ -535,7 +535,7 @@ def process_docx(filepath, field_filter_mode=None):
535 535 level += 1
536 536 if attrib_type == "end":
537 537 level -= 1
538   - if level == 0 or level == -1: # edge-case; level gets -1
  538 + if level in (0, -1): # edge-case; level gets -1
539 539 all_fields.append(ddetext)
540 540 ddetext = u''
541 541 level = 0 # reset edge-case
... ... @@ -564,6 +564,7 @@ def process_docx(filepath, field_filter_mode=None):
564 564  
565 565  
566 566 def unquote(field):
  567 + """TODO: document what exactly is happening here..."""
567 568 if "QUOTE" not in field or NO_QUOTES:
568 569 return field
569 570 # split into components
... ... @@ -606,7 +607,7 @@ def field_is_blacklisted(contents):
606 607 except ValueError: # first word is no blacklisted command
607 608 return False
608 609 logger.debug('trying to match "{0}" to blacklist command {1}'
609   - .format(contents, FIELD_BLACKLIST[index]))
  610 + .format(contents, FIELD_BLACKLIST[index]))
610 611 _, nargs_required, nargs_optional, sw_with_arg, sw_solo, sw_format \
611 612 = FIELD_BLACKLIST[index]
612 613  
... ... @@ -618,11 +619,12 @@ def field_is_blacklisted(contents):
618 619 nargs += 1
619 620 if nargs < nargs_required:
620 621 logger.debug('too few args: found {0}, but need at least {1} in "{2}"'
621   - .format(nargs, nargs_required, contents))
  622 + .format(nargs, nargs_required, contents))
622 623 return False
623   - elif nargs > nargs_required + nargs_optional:
624   - logger.debug('too many args: found {0}, but need at most {1}+{2} in "{3}"'
625   - .format(nargs, nargs_required, nargs_optional, contents))
  624 + if nargs > nargs_required + nargs_optional:
  625 + logger.debug('too many args: found {0}, but need at most {1}+{2} in '
  626 + '"{3}"'
  627 + .format(nargs, nargs_required, nargs_optional, contents))
626 628 return False
627 629  
628 630 # check switches
... ... @@ -632,14 +634,14 @@ def field_is_blacklisted(contents):
632 634 if expect_arg: # this is an argument for the last switch
633 635 if arg_choices and (word not in arg_choices):
634 636 logger.debug('Found invalid switch argument "{0}" in "{1}"'
635   - .format(word, contents))
  637 + .format(word, contents))
636 638 return False
637 639 expect_arg = False
638 640 arg_choices = [] # in general, do not enforce choices
639 641 continue # "no further questions, your honor"
640 642 elif not FIELD_SWITCH_REGEX.match(word):
641 643 logger.debug('expected switch, found "{0}" in "{1}"'
642   - .format(word, contents))
  644 + .format(word, contents))
643 645 return False
644 646 # we want a switch and we got a valid one
645 647 switch = word[1]
... ... @@ -661,7 +663,7 @@ def field_is_blacklisted(contents):
661 663 arg_choices = [] # too many choices to list them here
662 664 else:
663 665 logger.debug('unexpected switch {0} in "{1}"'
664   - .format(switch, contents))
  666 + .format(switch, contents))
665 667 return False
666 668  
667 669 # if nothing went wrong sofar, the contents seems to match the blacklist
... ... @@ -676,7 +678,7 @@ def process_xlsx(filepath):
676 678 tag = elem.tag.lower()
677 679 if tag == 'ddelink' or tag.endswith('}ddelink'):
678 680 # we have found a dde link. Try to get more info about it
679   - link_info = ['DDE-Link']
  681 + link_info = []
680 682 if 'ddeService' in elem.attrib:
681 683 link_info.append(elem.attrib['ddeService'])
682 684 if 'ddeTopic' in elem.attrib:
... ... @@ -687,16 +689,15 @@ def process_xlsx(filepath):
687 689 for subfile, content_type, handle in parser.iter_non_xml():
688 690 try:
689 691 logger.info('Parsing non-xml subfile {0} with content type {1}'
690   - .format(subfile, content_type))
  692 + .format(subfile, content_type))
691 693 for record in xls_parser.parse_xlsb_part(handle, content_type,
692 694 subfile):
693 695 logger.debug('{0}: {1}'.format(subfile, record))
694 696 if isinstance(record, xls_parser.XlsbBeginSupBook) and \
695 697 record.link_type == \
696 698 xls_parser.XlsbBeginSupBook.LINK_TYPE_DDE:
697   - dde_links.append('DDE-Link ' + record.string1 + ' ' +
698   - record.string2)
699   - except Exception:
  699 + dde_links.append(record.string1 + ' ' + record.string2)
  700 + except Exception as exc:
700 701 if content_type.startswith('application/vnd.ms-excel.') or \
701 702 content_type.startswith('application/vnd.ms-office.'): # pylint: disable=bad-indentation
702 703 # should really be able to parse these either as xml or records
... ... @@ -727,7 +728,8 @@ class RtfFieldParser(rtfobj.RtfParser):
727 728  
728 729 def open_destination(self, destination):
729 730 if destination.cword == b'fldinst':
730   - logger.debug('*** Start field data at index %Xh' % destination.start)
  731 + logger.debug('*** Start field data at index %Xh'
  732 + % destination.start)
731 733  
732 734 def close_destination(self, destination):
733 735 if destination.cword == b'fldinst':
... ... @@ -758,7 +760,7 @@ def process_rtf(file_handle, field_filter_mode=None):
758 760 all_fields = [field.decode('ascii') for field in rtfparser.fields]
759 761 # apply field command filter
760 762 logger.debug('found {1} fields, filtering with mode "{0}"'
761   - .format(field_filter_mode, len(all_fields)))
  763 + .format(field_filter_mode, len(all_fields)))
762 764 if field_filter_mode in (FIELD_FILTER_ALL, None):
763 765 clean_fields = all_fields
764 766 elif field_filter_mode == FIELD_FILTER_DDE:
... ... @@ -815,11 +817,12 @@ def process_csv(filepath):
815 817 results, _ = process_csv_dialect(file_handle, delim)
816 818 except csv.Error: # e.g. sniffing fails
817 819 logger.debug('failed to csv-parse with delimiter {0!r}'
818   - .format(delim))
  820 + .format(delim))
819 821  
820 822 if is_small and not results:
821 823 # try whole file as single cell, since sniffing fails in this case
822   - logger.debug('last attempt: take whole file as single unquoted cell')
  824 + logger.debug('last attempt: take whole file as single unquoted '
  825 + 'cell')
823 826 file_handle.seek(0)
824 827 match = CSV_DDE_FORMAT.match(file_handle.read(CSV_SMALL_THRESH))
825 828 if match:
... ... @@ -836,8 +839,8 @@ def process_csv_dialect(file_handle, delimiters):
836 839 delimiters=delimiters)
837 840 dialect.strict = False # microsoft is never strict
838 841 logger.debug('sniffed csv dialect with delimiter {0!r} '
839   - 'and quote char {1!r}'
840   - .format(dialect.delimiter, dialect.quotechar))
  842 + 'and quote char {1!r}'
  843 + .format(dialect.delimiter, dialect.quotechar))
841 844  
842 845 # rewind file handle to start
843 846 file_handle.seek(0)
... ... @@ -892,19 +895,12 @@ def process_file(filepath, field_filter_mode=None):
892 895 logger.debug('Process file as excel 2003 (xls)')
893 896 return process_xls(filepath)
894 897  
895   - # encrypted files also look like ole, even if office 2007+ (xml-based)
896   - # so check for encryption, first
897 898 ole = olefile.OleFileIO(filepath, path_encoding=None)
898   - oid = oleid.OleID(ole)
899   - if oid.check_encrypted().value:
900   - log.debug('is encrypted - raise error')
901   - raise FileIsEncryptedError(filepath)
902   - elif oid.check_powerpoint().value:
903   - log.debug('is ppt - cannot have DDE')
  899 + if is_ppt(ole):
  900 + logger.debug('is ppt - cannot have DDE')
904 901 return u''
905   - else:
906   - logger.debug('Process file as word 2003 (doc)')
907   - return process_doc(ole)
  902 + logger.debug('Process file as word 2003 (doc)')
  903 + return process_doc(ole)
908 904  
909 905 with open(filepath, 'rb') as file_handle:
910 906 if file_handle.read(4) == RTF_START:
... ... @@ -921,22 +917,73 @@ def process_file(filepath, field_filter_mode=None):
921 917 if doctype == ooxml.DOCTYPE_EXCEL:
922 918 logger.debug('Process file as excel 2007+ (xlsx)')
923 919 return process_xlsx(filepath)
924   - elif doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
  920 + if doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
925 921 logger.debug('Process file as xml from excel 2003/2007+')
926 922 return process_excel_xml(filepath)
927   - elif doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
  923 + if doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
928 924 logger.debug('Process file as xml from word 2003/2007+')
929 925 return process_docx(filepath)
930   - elif doctype is None:
  926 + if doctype is None:
931 927 logger.debug('Process file as csv')
932 928 return process_csv(filepath)
933   - else: # could be docx; if not: this is the old default code path
934   - logger.debug('Process file as word 2007+ (docx)')
935   - return process_docx(filepath, field_filter_mode)
  929 + # could be docx; if not: this is the old default code path
  930 + logger.debug('Process file as word 2007+ (docx)')
  931 + return process_docx(filepath, field_filter_mode)
936 932  
937 933  
938 934 # === MAIN =================================================================
939 935  
  936 +
  937 +def process_maybe_encrypted(filepath, passwords=None, crypto_nesting=0,
  938 + **kwargs):
  939 + """
  940 + Process a file that might be encrypted.
  941 +
  942 + Calls :py:func:`process_file` and if that fails tries to decrypt and
  943 + process the result. Based on recommendation in module doc string of
  944 + :py:mod:`oletools.crypto`.
  945 +
  946 + :param str filepath: path to file on disc.
  947 + :param passwords: list of passwords (str) to try for decryption or None
  948 + :param int crypto_nesting: How many decryption layers were already used to
  949 + get the given file.
  950 + :param kwargs: same as :py:func:`process_file`
  951 + :returns: same as :py:func:`process_file`
  952 + """
  953 + result = u''
  954 + try:
  955 + result = process_file(filepath, **kwargs)
  956 + if not crypto.is_encrypted(filepath):
  957 + return result
  958 + except Exception:
  959 + if not crypto.is_encrypted(filepath):
  960 + raise
  961 +
  962 + # we reach this point only if file is encrypted
  963 + # check if this is an encrypted file in an encrypted file in an ...
  964 + if crypto_nesting >= crypto.MAX_NESTING_DEPTH:
  965 + raise crypto.MaxCryptoNestingReached(crypto_nesting, filepath)
  966 +
  967 + decrypted_file = None
  968 + if passwords is None:
  969 + passwords = [crypto.WRITE_PROTECT_ENCRYPTION_PASSWORD, ]
  970 + else:
  971 + passwords = list(passwords) + \
  972 + [crypto.WRITE_PROTECT_ENCRYPTION_PASSWORD, ]
  973 + try:
  974 + logger.debug('Trying to decrypt file')
  975 + decrypted_file = crypto.decrypt(filepath, passwords)
  976 + logger.info('Analyze decrypted file')
  977 + result = process_maybe_encrypted(decrypted_file, passwords,
  978 + crypto_nesting+1, **kwargs)
  979 + finally: # clean up
  980 + try: # (maybe file was not yet created)
  981 + os.unlink(decrypted_file)
  982 + except Exception:
  983 + pass
  984 + return result
  985 +
  986 +
940 987 def main(cmd_line_args=None):
941 988 """ Main function, called if this file is called as a script
942 989  
... ... @@ -961,10 +1008,12 @@ def main(cmd_line_args=None):
961 1008 text = ''
962 1009 return_code = 1
963 1010 try:
964   - text = process_file(args.filepath, args.field_filter_mode)
  1011 + text = process_maybe_encrypted(
  1012 + args.filepath, args.password,
  1013 + field_filter_mode=args.field_filter_mode)
965 1014 return_code = 0
966 1015 except Exception as exc:
967   - logger.exception(exc.message)
  1016 + logger.exception(str(exc))
968 1017  
969 1018 logger.print_str('DDE Links:')
970 1019 logger.print_str(text)
... ...
oletools/oleid.py
... ... @@ -93,6 +93,7 @@ except ImportError:
93 93 sys.path.insert(0, PARENT_DIR)
94 94 del PARENT_DIR
95 95 from oletools.thirdparty.prettytable import prettytable
  96 +from oletools import crypto
96 97  
97 98 import olefile
98 99  
... ... @@ -279,20 +280,7 @@ class OleID(object):
279 280 self.indicators.append(encrypted)
280 281 if not self.ole:
281 282 return None
282   - # check if bit 1 of security field = 1:
283   - # (this field may be missing for Powerpoint2000, for example)
284   - if self.suminfo_data is None:
285   - self.check_properties()
286   - if 0x13 in self.suminfo_data:
287   - if self.suminfo_data[0x13] & 1:
288   - encrypted.value = True
289   - # check if this is an OpenXML encrypted file
290   - elif self.ole.exists('EncryptionInfo'):
291   - encrypted.value = True
292   - # or an encrypted ppt file
293   - if self.ole.exists('EncryptedSummary') and \
294   - not self.ole.exists('SummaryInformation'):
295   - encrypted.value = True
  283 + encrypted.value = crypto.is_encrypted(self.ole)
296 284 return encrypted
297 285  
298 286 def check_word(self):
... ... @@ -316,27 +304,7 @@ class OleID(object):
316 304 return None, None
317 305 if self.ole.exists('WordDocument'):
318 306 word.value = True
319   - # check for Word-specific encryption flag:
320   - stream = None
321   - try:
322   - stream = self.ole.openstream(["WordDocument"])
323   - # pass header 10 bytes
324   - stream.read(10)
325   - # read flag structure:
326   - temp16 = struct.unpack("H", stream.read(2))[0]
327   - f_encrypted = (temp16 & 0x0100) >> 8
328   - if f_encrypted:
329   - # correct encrypted indicator if present or add one
330   - encrypt_ind = self.get_indicator('encrypted')
331   - if encrypt_ind:
332   - encrypt_ind.value = True
333   - else:
334   - self.indicators.append('encrypted', True, name='Encrypted')
335   - except Exception:
336   - raise
337   - finally:
338   - if stream is not None:
339   - stream.close()
  307 +
340 308 # check for VBA macros:
341 309 if self.ole.exists('Macros'):
342 310 macros.value = True
... ...
oletools/olevba.py
... ... @@ -312,8 +312,7 @@ from pyparsing import \
312 312 from oletools import ppt_parser
313 313 from oletools import oleform
314 314 from oletools import rtfobj
315   -from oletools import oleid
316   -from oletools.common.errors import FileIsEncryptedError
  315 +from oletools import crypto
317 316 from oletools.common import codepages
318 317  
319 318 # monkeypatch email to fix issue #32:
... ... @@ -2585,12 +2584,6 @@ class VBA_Parser(object):
2585 2584 # This looks like an OLE file
2586 2585 self.open_ole(_file)
2587 2586  
2588   - # check whether file is encrypted (need to do this before try ppt)
2589   - log.debug('Check encryption of ole file')
2590   - crypt_indicator = oleid.OleID(self.ole_file).check_encrypted()
2591   - if crypt_indicator.value:
2592   - raise FileIsEncryptedError(filename)
2593   -
2594 2587 # if this worked, try whether it is a ppt file (special ole file)
2595 2588 self.open_ppt()
2596 2589 if self.type is None and zipfile.is_zipfile(_file):
... ... @@ -3741,6 +3734,10 @@ def parse_args(cmd_line_args=None):
3741 3734 help='find files recursively in subdirectories.')
3742 3735 parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
3743 3736 help='if the file is a zip archive, open all files from it, using the provided password.')
  3737 + parser.add_option("-p", "--password", type='str', action='append',
  3738 + default=[],
  3739 + help='if encrypted office files are encountered, try '
  3740 + 'decryption with this password. May be repeated.')
3744 3741 parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
3745 3742 help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
3746 3743 # output mode; could make this even simpler with add_option(type='choice') but that would make
... ... @@ -3790,6 +3787,106 @@ def parse_args(cmd_line_args=None):
3790 3787 return options, args
3791 3788  
3792 3789  
  3790 +def process_file(filename, data, container, options, crypto_nesting=0):
  3791 + """
  3792 + Part of main function that processes a single file.
  3793 +
  3794 + This handles exceptions and encryption.
  3795 +
  3796 + Returns a single code summarizing the status of processing of this file
  3797 + """
  3798 + try:
  3799 + # Open the file
  3800 + vba_parser = VBA_Parser_CLI(filename, data=data, container=container,
  3801 + relaxed=options.relaxed)
  3802 +
  3803 + if options.output_mode == 'detailed':
  3804 + # fully detailed output
  3805 + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
  3806 + display_code=options.display_code,
  3807 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  3808 + show_deobfuscated_code=options.show_deobfuscated_code,
  3809 + deobfuscate=options.deobfuscate)
  3810 + elif options.output_mode == 'triage':
  3811 + # summarized output for triage:
  3812 + vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
  3813 + deobfuscate=options.deobfuscate)
  3814 + elif options.output_mode == 'json':
  3815 + print_json(
  3816 + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
  3817 + display_code=options.display_code,
  3818 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  3819 + show_deobfuscated_code=options.show_deobfuscated_code,
  3820 + deobfuscate=options.deobfuscate))
  3821 + else: # (should be impossible)
  3822 + raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
  3823 +
  3824 + # even if processing succeeds, file might still be encrypted
  3825 + log.debug('Checking for encryption')
  3826 + if not crypto.is_encrypted(filename):
  3827 + return RETURN_OK
  3828 + except Exception as exc:
  3829 + log.debug('Checking for encryption')
  3830 + if crypto.is_encrypted(filename):
  3831 + pass # deal with this below
  3832 + else:
  3833 + if isinstance(exc, (SubstreamOpenError, UnexpectedDataError)):
  3834 + if options.output_mode in ('triage', 'unspecified'):
  3835 + print('%-12s %s - Error opening substream or uenxpected ' \
  3836 + 'content' % ('?', filename))
  3837 + elif options.output_mode == 'json':
  3838 + print_json(file=filename, type='error',
  3839 + error=type(exc).__name__, message=str(exc))
  3840 + else:
  3841 + log.exception('Error opening substream or unexpected '
  3842 + 'content in %s' % filename)
  3843 + return RETURN_OPEN_ERROR
  3844 + elif isinstance(exc, FileOpenError):
  3845 + if options.output_mode in ('triage', 'unspecified'):
  3846 + print('%-12s %s - File format not supported' % ('?', filename))
  3847 + elif options.output_mode == 'json':
  3848 + print_json(file=filename, type='error',
  3849 + error=type(exc).__name__, message=str(exc))
  3850 + else:
  3851 + log.exception('Failed to open %s -- probably not supported!' % filename)
  3852 + return RETURN_OPEN_ERROR
  3853 + elif isinstance(exc, ProcessingError):
  3854 + if options.output_mode in ('triage', 'unspecified'):
  3855 + print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc))
  3856 + elif options.output_mode == 'json':
  3857 + print_json(file=filename, type='error',
  3858 + error=type(exc).__name__,
  3859 + message=str(exc.orig_exc))
  3860 + else:
  3861 + log.exception('Error processing file %s (%s)!'
  3862 + % (filename, exc.orig_exc))
  3863 + return RETURN_PARSE_ERROR
  3864 + else:
  3865 + raise # let caller deal with this
  3866 +
  3867 + # we reach this point only if file is encrypted
  3868 + # check if this is an encrypted file in an encrypted file in an ...
  3869 + if crypto_nesting >= crypto.MAX_NESTING_DEPTH:
  3870 + raise crypto.MaxCryptoNestingReached(crypto_nesting, filename)
  3871 +
  3872 + decrypted_file = None
  3873 + try:
  3874 + log.debug('Checking encryption passwords {}'.format(options.password))
  3875 + passwords = options.password + \
  3876 + [crypto.WRITE_PROTECT_ENCRYPTION_PASSWORD, ]
  3877 + decrypted_file = crypto.decrypt(filename, passwords)
  3878 + if not decrypted_file:
  3879 + raise crypto.WrongEncryptionPassword(filename)
  3880 + log.info('Working on decrypted file')
  3881 + return process_file(decrypted_file, data, container or filename,
  3882 + options, crypto_nesting+1)
  3883 + except Exception:
  3884 + raise
  3885 + finally: # clean up
  3886 + if decrypted_file is not None and os.path.isfile(decrypted_file):
  3887 + os.unlink(decrypted_file)
  3888 +
  3889 +
3793 3890 def main(cmd_line_args=None):
3794 3891 """
3795 3892 Main function, called when olevba is run from the command line
... ... @@ -3824,35 +3921,44 @@ def main(cmd_line_args=None):
3824 3921 if options.output_mode == 'triage' and options.show_deobfuscated_code:
3825 3922 log.info('ignoring option --reveal in triage output mode')
3826 3923  
3827   - # Column headers (do not know how many files there will be yet, so if no output_mode
3828   - # was specified, we will print triage for first file --> need these headers)
3829   - if options.output_mode in ('triage', 'unspecified'):
  3924 + # gather info on all files that must be processed
  3925 + # ignore directory names stored in zip files:
  3926 + all_input_info = tuple((container, filename, data) for
  3927 + container, filename, data in xglob.iter_files(
  3928 + args, recursive=options.recursive,
  3929 + zip_password=options.zip_password,
  3930 + zip_fname=options.zip_fname)
  3931 + if not (container and filename.endswith('/')))
  3932 +
  3933 + # specify output mode if options -t, -d and -j were not specified
  3934 + if options.output_mode == 'unspecified':
  3935 + if len(all_input_info) == 1:
  3936 + options.output_mode = 'detailed'
  3937 + else:
  3938 + options.output_mode = 'triage'
  3939 +
  3940 + # Column headers for triage mode
  3941 + if options.output_mode == 'triage':
3830 3942 print('%-12s %-65s' % ('Flags', 'Filename'))
3831 3943 print('%-12s %-65s' % ('-' * 11, '-' * 65))
3832 3944  
3833 3945 previous_container = None
3834 3946 count = 0
3835 3947 container = filename = data = None
3836   - vba_parser = None
3837 3948 return_code = RETURN_OK
3838 3949 try:
3839   - for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
3840   - zip_password=options.zip_password, zip_fname=options.zip_fname):
3841   - # ignore directory names stored in zip files:
3842   - if container and filename.endswith('/'):
3843   - continue
3844   -
  3950 + for container, filename, data in all_input_info:
3845 3951 # handle errors from xglob
3846 3952 if isinstance(data, Exception):
3847 3953 if isinstance(data, PathNotFoundException):
3848   - if options.output_mode in ('triage', 'unspecified'):
  3954 + if options.output_mode == 'triage':
3849 3955 print('%-12s %s - File not found' % ('?', filename))
3850 3956 elif options.output_mode != 'json':
3851 3957 log.error('Given path %r does not exist!' % filename)
3852 3958 return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \
3853 3959 else RETURN_SEVERAL_ERRS
3854 3960 else:
3855   - if options.output_mode in ('triage', 'unspecified'):
  3961 + if options.output_mode == 'triage':
3856 3962 print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container))
3857 3963 elif options.output_mode != 'json':
3858 3964 log.error('Exception opening/reading %r from zip file %r: %s'
... ... @@ -3864,107 +3970,42 @@ def main(cmd_line_args=None):
3864 3970 error=type(data).__name__, message=str(data))
3865 3971 continue
3866 3972  
3867   - try:
3868   - # close the previous file if analyzing several:
3869   - # (this must be done here to avoid closing the file if there is only 1,
3870   - # to fix issue #219)
3871   - if vba_parser is not None:
3872   - vba_parser.close()
3873   - # Open the file
3874   - vba_parser = VBA_Parser_CLI(filename, data=data, container=container,
3875   - relaxed=options.relaxed)
3876   -
3877   - if options.output_mode == 'detailed':
3878   - # fully detailed output
3879   - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
3880   - display_code=options.display_code,
3881   - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
3882   - show_deobfuscated_code=options.show_deobfuscated_code,
3883   - deobfuscate=options.deobfuscate)
3884   - elif options.output_mode in ('triage', 'unspecified'):
3885   - # print container name when it changes:
3886   - if container != previous_container:
3887   - if container is not None:
3888   - print('\nFiles in %s:' % container)
3889   - previous_container = container
3890   - # summarized output for triage:
3891   - vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
3892   - deobfuscate=options.deobfuscate)
3893   - elif options.output_mode == 'json':
3894   - print_json(
3895   - vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
3896   - display_code=options.display_code,
3897   - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
3898   - show_deobfuscated_code=options.show_deobfuscated_code,
3899   - deobfuscate=options.deobfuscate))
3900   - else: # (should be impossible)
3901   - raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
3902   - count += 1
3903   -
3904   - except (SubstreamOpenError, UnexpectedDataError) as exc:
3905   - if options.output_mode in ('triage', 'unspecified'):
3906   - print('%-12s %s - Error opening substream or uenxpected ' \
3907   - 'content' % ('?', filename))
3908   - elif options.output_mode == 'json':
3909   - print_json(file=filename, type='error',
3910   - error=type(exc).__name__, message=str(exc))
3911   - else:
3912   - log.exception('Error opening substream or unexpected '
3913   - 'content in %s' % filename)
3914   - return_code = RETURN_OPEN_ERROR if return_code == 0 \
3915   - else RETURN_SEVERAL_ERRS
3916   - except FileOpenError as exc:
3917   - if options.output_mode in ('triage', 'unspecified'):
3918   - print('%-12s %s - File format not supported' % ('?', filename))
3919   - elif options.output_mode == 'json':
3920   - print_json(file=filename, type='error',
3921   - error=type(exc).__name__, message=str(exc))
3922   - else:
3923   - log.exception('Failed to open %s -- probably not supported!' % filename)
3924   - return_code = RETURN_OPEN_ERROR if return_code == 0 \
3925   - else RETURN_SEVERAL_ERRS
3926   - except ProcessingError as exc:
3927   - if options.output_mode in ('triage', 'unspecified'):
3928   - print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc))
3929   - elif options.output_mode == 'json':
3930   - print_json(file=filename, type='error',
3931   - error=type(exc).__name__,
3932   - message=str(exc.orig_exc))
3933   - else:
3934   - log.exception('Error processing file %s (%s)!'
3935   - % (filename, exc.orig_exc))
3936   - return_code = RETURN_PARSE_ERROR if return_code == 0 \
3937   - else RETURN_SEVERAL_ERRS
3938   - except FileIsEncryptedError as exc:
3939   - if options.output_mode in ('triage', 'unspecified'):
3940   - print('%-12s %s - File is encrypted' % ('!ERROR', filename))
3941   - elif options.output_mode == 'json':
3942   - print_json(file=filename, type='error',
3943   - error=type(exc).__name__, message=str(exc))
3944   - else:
3945   - log.exception('File %s is encrypted!' % (filename))
3946   - return_code = RETURN_ENCRYPTED if return_code == 0 \
3947   - else RETURN_SEVERAL_ERRS
3948   - # Here we do not close the vba_parser, because process_file may need it below.
  3973 + if options.output_mode == 'triage':
  3974 + # print container name when it changes:
  3975 + if container != previous_container:
  3976 + if container is not None:
  3977 + print('\nFiles in %s:' % container)
  3978 + previous_container = container
  3979 +
  3980 + # process the file, handling errors and encryption
  3981 + curr_return_code = process_file(filename, data, container, options)
  3982 + count += 1
  3983 +
  3984 + # adjust overall return code
  3985 + if curr_return_code == RETURN_OK:
  3986 + continue # do not modify overall return code
  3987 + if return_code == RETURN_OK:
  3988 + return_code = curr_return_code # first error return code
  3989 + else:
  3990 + return_code = RETURN_SEVERAL_ERRS # several errors
3949 3991  
3950 3992 if options.output_mode == 'triage':
3951 3993 print('\n(Flags: OpX=OpenXML, XML=Word2003XML, FlX=FlatOPC XML, MHT=MHTML, TXT=Text, M=Macros, ' \
3952 3994 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
3953 3995 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n')
3954 3996  
3955   - if count == 1 and options.output_mode == 'unspecified':
3956   - # if options -t, -d and -j were not specified and it's a single file, print details:
3957   - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
3958   - display_code=options.display_code,
3959   - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
3960   - show_deobfuscated_code=options.show_deobfuscated_code,
3961   - deobfuscate=options.deobfuscate)
3962   -
3963 3997 if options.output_mode == 'json':
3964 3998 # print last json entry (a last one without a comma) and closing ]
3965 3999 print_json(type='MetaInformation', return_code=return_code,
3966 4000 n_processed=count, _json_is_last=True)
3967 4001  
  4002 + except crypto.CryptoErrorBase as exc:
  4003 + log.exception('Problems with encryption in main: {}'.format(exc),
  4004 + exc_info=True)
  4005 + if return_code == RETURN_OK:
  4006 + return_code = RETURN_ENCRYPTED
  4007 + else:
  4008 + return_code == RETURN_SEVERAL_ERRS
3968 4009 except Exception as exc:
3969 4010 # some unexpected error, maybe some of the types caught in except clauses
3970 4011 # above were not sufficient. This is very bad, so log complete trace at exception level
... ...
oletools/ppt_record_parser.py
... ... @@ -63,7 +63,7 @@ except ImportError:
63 63 sys.path.insert(0, PARENT_DIR)
64 64 del PARENT_DIR
65 65 from oletools import record_base
66   -from oletools.common.errors import FileIsEncryptedError
  66 +from oletools.common.errors import CryptoErrorBase
67 67  
68 68  
69 69 # types of relevant records (there are much more than listed here)
... ... @@ -149,6 +149,10 @@ def is_ppt(filename):
149 149 Param filename can be anything that OleFileIO constructor accepts: name of
150 150 file or file data or data stream.
151 151  
  152 + Will not try to decrypt the file not even try to determine whether it is
  153 + encrypted. If the file is encrypted will either raise an error or just
  154 + return `False`.
  155 +
152 156 see also: oleid.OleID.check_powerpoint
153 157 """
154 158 have_current_user = False
... ... @@ -181,11 +185,8 @@ def is_ppt(filename):
181 185 return True
182 186 else: # ignore other streams/storages since they are optional
183 187 continue
184   - except FileIsEncryptedError:
185   - assert ppt_file is not None, \
186   - 'Encryption error should not be raised from just opening OLE file.'
187   - # just rely on stream names, copied from oleid
188   - return ppt_file.exists('PowerPoint Document')
  188 + except CryptoErrorBase:
  189 + raise
189 190 except Exception:
190 191 pass
191 192 return False
... ...
oletools/record_base.py
... ... @@ -74,7 +74,6 @@ PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname(
74 74 if PARENT_DIR not in sys.path:
75 75 sys.path.insert(0, PARENT_DIR)
76 76 del PARENT_DIR
77   -from oletools.common.errors import FileIsEncryptedError
78 77 from oletools import oleid
79 78  
80 79  
... ... @@ -127,10 +126,9 @@ class OleRecordFile(olefile.OleFileIO):
127 126 """
128 127  
129 128 def open(self, filename, *args, **kwargs):
130   - """Call OleFileIO.open, raise error if is encrypted."""
  129 + """Call OleFileIO.open."""
131 130 #super(OleRecordFile, self).open(filename, *args, **kwargs)
132 131 OleFileIO.open(self, filename, *args, **kwargs)
133   - self.is_encrypted = oleid.OleID(self).check_encrypted().value
134 132  
135 133 @classmethod
136 134 def stream_class_for_name(cls, stream_name):
... ... @@ -163,8 +161,7 @@ class OleRecordFile(olefile.OleFileIO):
163 161 stream = clz(self._open(direntry.isectStart, direntry.size),
164 162 direntry.size,
165 163 None if is_orphan else direntry.name,
166   - direntry.entry_type,
167   - self.is_encrypted)
  164 + direntry.entry_type)
168 165 yield stream
169 166 stream.close()
170 167  
... ... @@ -177,14 +174,13 @@ class OleRecordStream(object):
177 174 abstract base class
178 175 """
179 176  
180   - def __init__(self, stream, size, name, stream_type, is_encrypted=False):
  177 + def __init__(self, stream, size, name, stream_type):
181 178 self.stream = stream
182 179 self.size = size
183 180 self.name = name
184 181 if stream_type not in ENTRY_TYPE2STR:
185 182 raise ValueError('Unknown stream type: {0}'.format(stream_type))
186 183 self.stream_type = stream_type
187   - self.is_encrypted = is_encrypted
188 184  
189 185 def read_record_head(self):
190 186 """ read first few bytes of record to determine size and type
... ... @@ -213,9 +209,6 @@ class OleRecordStream(object):
213 209  
214 210 Stream must be positioned at start of records (e.g. start of stream).
215 211 """
216   - if self.is_encrypted:
217   - raise FileIsEncryptedError()
218   -
219 212 while True:
220 213 # unpacking as in olevba._extract_vba
221 214 pos = self.stream.tell()
... ...
oletools/xls_parser.py
... ... @@ -101,7 +101,7 @@ def read_unicode(data, start_idx, n_chars):
101 101 """ read a unicode string from a XLUnicodeStringNoCch structure """
102 102 # first bit 0x0 --> only low-bytes are saved, all high bytes are 0
103 103 # first bit 0x1 --> 2 bytes per character
104   - low_bytes_only = (ord(data[start_idx]) == 0)
  104 + low_bytes_only = (ord(data[start_idx:start_idx+1]) == 0)
105 105 if low_bytes_only:
106 106 end_idx = start_idx + 1 + n_chars
107 107 return data[start_idx+1:end_idx].decode('ascii'), end_idx
... ... @@ -349,6 +349,7 @@ class XlsRecordSupBook(XlsRecord):
349 349 LINK_TYPE_EXTERNAL = 'external workbook'
350 350  
351 351 def finish_constructing(self, _):
  352 + """Finish constructing this record; called at end of constructor."""
352 353 # set defaults
353 354 self.ctab = None
354 355 self.cch = None
... ...
setup.py
... ... @@ -28,6 +28,7 @@ to install this package.
28 28 # 2018-09-15 PL: - easygui is now a dependency
29 29 # 2018-09-22 PL: - colorclass is now a dependency
30 30 # 2018-10-27 PL: - fixed issue #359 (bug when importing log_helper)
  31 +# 2019-02-26 CH: - add optional dependency msoffcrypto for decryption
31 32  
32 33 #--- TODO ---------------------------------------------------------------------
33 34  
... ... @@ -317,6 +318,10 @@ def main():
317 318 "easygui",
318 319 'colorclass',
319 320 ],
  321 + extras_require = {
  322 + # msoffcrypto-tools by nolze can be used to decrypt some office files
  323 + 'decrypt': ['msoffcrypto']
  324 + }
320 325 )
321 326  
322 327  
... ...
tests/common/log_helper/test_log_helper.py
... ... @@ -13,9 +13,11 @@ from tests.common.log_helper import log_helper_test_main
13 13 from tests.common.log_helper import log_helper_test_imported
14 14 from os.path import dirname, join, relpath, abspath
15 15  
  16 +from tests.test_utils import PROJECT_ROOT
  17 +
16 18 # this is the common base of "tests" and "oletools" dirs
17   -ROOT_DIRECTORY = abspath(join(__file__, '..', '..', '..', '..'))
18   -TEST_FILE = relpath(join(dirname(__file__), 'log_helper_test_main.py'), ROOT_DIRECTORY)
  19 +TEST_FILE = relpath(join(dirname(abspath(__file__)), 'log_helper_test_main.py'),
  20 + PROJECT_ROOT)
19 21 PYTHON_EXECUTABLE = sys.executable
20 22  
21 23 MAIN_LOG_MESSAGES = [
... ... @@ -90,9 +92,9 @@ class TestLogHelper(unittest.TestCase):
90 92 child = subprocess.Popen(
91 93 [PYTHON_EXECUTABLE, TEST_FILE] + args,
92 94 shell=False,
93   - env={'PYTHONPATH': ROOT_DIRECTORY},
  95 + env={'PYTHONPATH': PROJECT_ROOT},
94 96 universal_newlines=True,
95   - cwd=ROOT_DIRECTORY,
  97 + cwd=PROJECT_ROOT,
96 98 stdin=None,
97 99 stdout=subprocess.PIPE,
98 100 stderr=subprocess.PIPE
... ...
tests/msodde/test_basic.py
... ... @@ -123,7 +123,7 @@ class TestDdeLinks(unittest.TestCase):
123 123  
124 124 def test_excel(self):
125 125 """ check that dde links are found in excel 2007+ files """
126   - expect = ['DDE-Link cmd /c calc.exe', ]
  126 + expect = ['cmd /c calc.exe', ]
127 127 for extn in 'xlsx', 'xlsm', 'xlsb':
128 128 output = msodde.process_file(
129 129 join(BASE_DIR, 'msodde', 'dde-test.' + extn), msodde.FIELD_FILTER_BLACKLIST)
... ...
tests/msodde/test_crypto.py 0 → 100644
  1 +"""Check decryption of files from msodde works."""
  2 +
  3 +import sys
  4 +import unittest
  5 +from os.path import join as pjoin
  6 +
  7 +from tests.test_utils import DATA_BASE_DIR
  8 +
  9 +from oletools import crypto
  10 +from oletools import msodde
  11 +
  12 +
  13 +@unittest.skipIf(not crypto.check_msoffcrypto(),
  14 + 'Module msoffcrypto not installed for python{}.{}'
  15 + .format(sys.version_info.major, sys.version_info.minor))
  16 +class MsoddeCryptoTest(unittest.TestCase):
  17 + """Test integration of decryption in msodde."""
  18 + def test_standard_password(self):
  19 + """Check dde-link is found in xls[mb] sample files."""
  20 + for suffix in 'xls', 'xlsx', 'xlsm', 'xlsb':
  21 + example_file = pjoin(DATA_BASE_DIR, 'encrypted',
  22 + 'dde-test-encrypt-standardpassword.' + suffix)
  23 + link_text = msodde.process_maybe_encrypted(example_file)
  24 + self.assertEqual(link_text, 'cmd /c calc.exe',
  25 + msg='Unexpected output {!r} for {}'
  26 + .format(link_text, suffix))
  27 +
  28 +
  29 +if __name__ == '__main__':
  30 + unittest.main()
... ...
tests/olevba/test_basic.py
... ... @@ -28,7 +28,15 @@ class TestOlevbaBasic(unittest.TestCase):
28 28 CRYPT_DIR = join(DATA_BASE_DIR, 'encrypted')
29 29 CRYPT_RETURN_CODE = 9
30 30 ADD_ARGS = [], ['-d', ], ['-a', ], ['-j', ], ['-t', ]
  31 + EXCEPTIONS = ['autostart-encrypt-standardpassword.xlsm', # These ...
  32 + 'autostart-encrypt-standardpassword.xlsb', # files ...
  33 + 'dde-test-encrypt-standardpassword.xls', # are ...
  34 + 'dde-test-encrypt-standardpassword.xlsx', # decrypted
  35 + 'dde-test-encrypt-standardpassword.xlsm', # per ...
  36 + 'dde-test-encrypt-standardpassword.xlsb'] # default.
31 37 for filename in os.listdir(CRYPT_DIR):
  38 + if filename in EXCEPTIONS:
  39 + continue
32 40 full_name = join(CRYPT_DIR, filename)
33 41 for args in ADD_ARGS:
34 42 try:
... ...
tests/olevba/test_crypto.py 0 → 100644
  1 +"""Check decryption of files from olevba works."""
  2 +
  3 +import sys
  4 +import unittest
  5 +import os
  6 +from os.path import join as pjoin
  7 +from subprocess import check_output, CalledProcessError
  8 +import json
  9 +from collections import OrderedDict
  10 +
  11 +from tests.test_utils import DATA_BASE_DIR, SOURCE_BASE_DIR
  12 +
  13 +from oletools import crypto
  14 +
  15 +
  16 +@unittest.skipIf(not crypto.check_msoffcrypto(),
  17 + 'Module msoffcrypto not installed for python{}.{}'
  18 + .format(sys.version_info.major, sys.version_info.minor))
  19 +class OlevbaCryptoWriteProtectTest(unittest.TestCase):
  20 + """
  21 + Test documents that are 'write-protected' through encryption.
  22 +
  23 + Excel has a way to 'write-protect' documents by encrypting them with a
  24 + hard-coded standard password. When looking at the file-structure you see
  25 + an OLE-file with streams `EncryptedPackage`, `StrongEncryptionSpace`, and
  26 + `EncryptionInfo`. Contained in the first is the actual file. When opening
  27 + such a file in excel, it is decrypted without the user noticing.
  28 +
  29 + Olevba should detect such encryption, try to decrypt with the standard
  30 + password and look for VBA code in the decrypted file.
  31 +
  32 + All these tests are skipped if the module `msoffcrypto-tools` is not
  33 + installed.
  34 + """
  35 + def test_autostart(self):
  36 + """Check that autostart macro is found in xls[mb] sample file."""
  37 + # create a PYTHONPATH environment var to prefer our olevba
  38 + env = os.environ
  39 + try:
  40 + env['PYTHONPATH'] = SOURCE_BASE_DIR + os.pathsep + \
  41 + os.environ['PYTHONPATH']
  42 + except KeyError:
  43 + env['PYTHONPATH'] = SOURCE_BASE_DIR
  44 +
  45 + for suffix in 'xlsm', 'xlsb':
  46 + example_file = pjoin(
  47 + DATA_BASE_DIR, 'encrypted',
  48 + 'autostart-encrypt-standardpassword.' + suffix)
  49 + try:
  50 + output = check_output([sys.executable, '-m', 'olevba', '-j',
  51 + example_file],
  52 + universal_newlines=True, env=env)
  53 + except CalledProcessError as err:
  54 + print(err.output)
  55 + raise
  56 + data = json.loads(output, object_pairs_hook=OrderedDict)
  57 + # debug: json.dump(data, sys.stdout, indent=4)
  58 + self.assertEqual(len(data), 4)
  59 + self.assertIn('script_name', data[0])
  60 + self.assertIn('version', data[0])
  61 + self.assertEqual(data[0]['type'], 'MetaInformation')
  62 + self.assertIn('return_code', data[-1])
  63 + self.assertEqual(data[-1]['type'], 'MetaInformation')
  64 + self.assertEqual(data[1]['container'], None)
  65 + self.assertEqual(data[1]['file'], example_file)
  66 + self.assertEqual(data[1]['analysis'], None)
  67 + self.assertEqual(data[1]['macros'], [])
  68 + self.assertEqual(data[1]['type'], 'OLE')
  69 + self.assertEqual(data[2]['container'], example_file)
  70 + self.assertNotEqual(data[2]['file'], example_file)
  71 + self.assertEqual(data[2]['type'], "OpenXML")
  72 + analysis = data[2]['analysis']
  73 + self.assertEqual(analysis[0]['type'], 'AutoExec')
  74 + self.assertEqual(analysis[0]['keyword'], 'Auto_Open')
  75 + macros = data[2]['macros']
  76 + self.assertEqual(macros[0]['vba_filename'], 'Modul1.bas')
  77 + self.assertIn('Sub Auto_Open()', macros[0]['code'])
  78 +
  79 +
  80 +if __name__ == '__main__':
  81 + unittest.main()
... ...
tests/ppt_parser/test_basic.py
... ... @@ -16,7 +16,7 @@ class TestBasic(unittest.TestCase):
16 16  
17 17 def test_is_ppt(self):
18 18 """ test ppt_record_parser.is_ppt(filename) """
19   - exceptions = []
  19 + exceptions = ['encrypted.ppt', ] # actually is ppt but embedded
20 20 for base_dir, _, files in os.walk(DATA_BASE_DIR):
21 21 for filename in files:
22 22 if filename in exceptions:
... ...
tests/test-data/encrypted/autostart-encrypt-standardpassword.xlsb 0 → 100755
No preview for this file type
tests/test-data/encrypted/autostart-encrypt-standardpassword.xlsm 0 → 100755
No preview for this file type
tests/test-data/encrypted/dde-test-encrypt-standardpassword.xls 0 → 100755
No preview for this file type
tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsb 0 → 100755
No preview for this file type
tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsm 0 → 100755
No preview for this file type
tests/test-data/encrypted/dde-test-encrypt-standardpassword.xlsx 0 → 100755
No preview for this file type
tests/test_utils/__init__.py
1   -from os.path import dirname, join
  1 +from os.path import dirname, join, abspath
  2 +
  3 +# Base dir of project, contains subdirs "tests" and "oletools" and README.md
  4 +PROJECT_ROOT = dirname(dirname(dirname(abspath(__file__))))
2 5  
3 6 # Directory with test data, independent of current working directory
4   -DATA_BASE_DIR = join(dirname(dirname(__file__)), 'test-data')
  7 +DATA_BASE_DIR = join(PROJECT_ROOT, 'tests', 'test-data')
  8 +
  9 +# Directory with source code
  10 +SOURCE_BASE_DIR = join(PROJECT_ROOT, 'oletools')
... ...