Commit e8b6dd46b5b6221d8ff6725862f3b14fa15f2128
Committed by
GitHub
Merge pull request #362 from christian-intra2net/encrypt-detect-and-raise
Encrypt detect and raise
Showing
23 changed files
with
621 additions
and
119 deletions
oletools/common/errors.py
0 → 100644
| 1 | +""" | ||
| 2 | +Errors used in several tools to avoid duplication | ||
| 3 | + | ||
| 4 | +.. codeauthor:: Intra2net AG <info@intra2net.com> | ||
| 5 | +""" | ||
| 6 | + | ||
| 7 | +class FileIsEncryptedError(ValueError): | ||
| 8 | + """Exception thrown if file is encrypted and cannot deal with it.""" | ||
| 9 | + # see also: same class in olevba[3] and record_base | ||
| 10 | + def __init__(self, filename=None): | ||
| 11 | + super(FileIsEncryptedError, self).__init__( | ||
| 12 | + 'Office file {}is encrypted, not yet supported' | ||
| 13 | + .format('' if filename is None else filename + ' ')) |
oletools/msodde.py
| @@ -11,6 +11,7 @@ Supported formats: | @@ -11,6 +11,7 @@ Supported formats: | ||
| 11 | - RTF | 11 | - RTF |
| 12 | - CSV (exported from / imported into Excel) | 12 | - CSV (exported from / imported into Excel) |
| 13 | - XML (exported from Word 2003, Word 2007+, Excel 2003, (Excel 2007+?) | 13 | - XML (exported from Word 2003, Word 2007+, Excel 2003, (Excel 2007+?) |
| 14 | +- raises an error if run with files encrypted using MS Crypto API RC4 | ||
| 14 | 15 | ||
| 15 | Author: Philippe Lagadec - http://www.decalage.info | 16 | Author: Philippe Lagadec - http://www.decalage.info |
| 16 | License: BSD, see source code or documentation | 17 | License: BSD, see source code or documentation |
| @@ -61,7 +62,9 @@ import olefile | @@ -61,7 +62,9 @@ import olefile | ||
| 61 | from oletools import ooxml | 62 | from oletools import ooxml |
| 62 | from oletools import xls_parser | 63 | from oletools import xls_parser |
| 63 | from oletools import rtfobj | 64 | from oletools import rtfobj |
| 65 | +from oletools import oleid | ||
| 64 | from oletools.common.log_helper import log_helper | 66 | from oletools.common.log_helper import log_helper |
| 67 | +from oletools.common.errors import FileIsEncryptedError | ||
| 65 | 68 | ||
| 66 | # ----------------------------------------------------------------------------- | 69 | # ----------------------------------------------------------------------------- |
| 67 | # CHANGELOG: | 70 | # CHANGELOG: |
| @@ -84,6 +87,7 @@ from oletools.common.log_helper import log_helper | @@ -84,6 +87,7 @@ from oletools.common.log_helper import log_helper | ||
| 84 | # 2018-01-10 CH: - add single-xml files (Word 2003/2007+ / Excel 2003) | 87 | # 2018-01-10 CH: - add single-xml files (Word 2003/2007+ / Excel 2003) |
| 85 | # 2018-03-21 CH: - added detection for various CSV formulas (issue #259) | 88 | # 2018-03-21 CH: - added detection for various CSV formulas (issue #259) |
| 86 | # 2018-09-11 v0.54 PL: - olefile is now a dependency | 89 | # 2018-09-11 v0.54 PL: - olefile is now a dependency |
| 90 | +# 2018-10-25 CH: - detect encryption and raise error if detected | ||
| 87 | 91 | ||
| 88 | __version__ = '0.54dev1' | 92 | __version__ = '0.54dev1' |
| 89 | 93 | ||
| @@ -438,17 +442,18 @@ def process_doc_stream(stream): | @@ -438,17 +442,18 @@ def process_doc_stream(stream): | ||
| 438 | return result_parts | 442 | return result_parts |
| 439 | 443 | ||
| 440 | 444 | ||
| 441 | -def process_doc(filepath): | 445 | +def process_doc(ole): |
| 442 | """ | 446 | """ |
| 443 | find dde links in word ole (.doc/.dot) file | 447 | find dde links in word ole (.doc/.dot) file |
| 444 | 448 | ||
| 449 | + Checks whether files is ppt and returns empty immediately in that case | ||
| 450 | + (ppt files cannot contain DDE-links to my knowledge) | ||
| 451 | + | ||
| 445 | like process_xml, returns a concatenated unicode string of dde links or | 452 | like process_xml, returns a concatenated unicode string of dde links or |
| 446 | empty if none were found. dde-links will still begin with the dde[auto] key | 453 | empty if none were found. dde-links will still begin with the dde[auto] key |
| 447 | word (possibly after some whitespace) | 454 | word (possibly after some whitespace) |
| 448 | """ | 455 | """ |
| 449 | logger.debug('process_doc') | 456 | logger.debug('process_doc') |
| 450 | - ole = olefile.OleFileIO(filepath, path_encoding=None) | ||
| 451 | - | ||
| 452 | links = [] | 457 | links = [] |
| 453 | for sid, direntry in enumerate(ole.direntries): | 458 | for sid, direntry in enumerate(ole.direntries): |
| 454 | is_orphan = direntry is None | 459 | is_orphan = direntry is None |
| @@ -703,8 +708,8 @@ def process_xlsx(filepath): | @@ -703,8 +708,8 @@ def process_xlsx(filepath): | ||
| 703 | log_func = logger.debug | 708 | log_func = logger.debug |
| 704 | else: # default | 709 | else: # default |
| 705 | log_func = logger.info | 710 | log_func = logger.info |
| 706 | - log_func('Failed to parse {0} of content type {1}' | ||
| 707 | - .format(subfile, content_type)) | 711 | + log_func('Failed to parse {0} of content type {1} ("{2}")' |
| 712 | + .format(subfile, content_type, str(exc))) | ||
| 708 | # in any case: continue with next | 713 | # in any case: continue with next |
| 709 | 714 | ||
| 710 | return u'\n'.join(dde_links) | 715 | return u'\n'.join(dde_links) |
| @@ -886,9 +891,20 @@ def process_file(filepath, field_filter_mode=None): | @@ -886,9 +891,20 @@ def process_file(filepath, field_filter_mode=None): | ||
| 886 | if xls_parser.is_xls(filepath): | 891 | if xls_parser.is_xls(filepath): |
| 887 | logger.debug('Process file as excel 2003 (xls)') | 892 | logger.debug('Process file as excel 2003 (xls)') |
| 888 | return process_xls(filepath) | 893 | return process_xls(filepath) |
| 894 | + | ||
| 895 | + # encrypted files also look like ole, even if office 2007+ (xml-based) | ||
| 896 | + # so check for encryption, first | ||
| 897 | + ole = olefile.OleFileIO(filepath, path_encoding=None) | ||
| 898 | + oid = oleid.OleID(ole) | ||
| 899 | + if oid.check_encrypted().value: | ||
| 900 | + log.debug('is encrypted - raise error') | ||
| 901 | + raise FileIsEncryptedError(filepath) | ||
| 902 | + elif oid.check_powerpoint().value: | ||
| 903 | + log.debug('is ppt - cannot have DDE') | ||
| 904 | + return u'' | ||
| 889 | else: | 905 | else: |
| 890 | logger.debug('Process file as word 2003 (doc)') | 906 | logger.debug('Process file as word 2003 (doc)') |
| 891 | - return process_doc(filepath) | 907 | + return process_doc(ole) |
| 892 | 908 | ||
| 893 | with open(filepath, 'rb') as file_handle: | 909 | with open(filepath, 'rb') as file_handle: |
| 894 | if file_handle.read(4) == RTF_START: | 910 | if file_handle.read(4) == RTF_START: |
oletools/oleid.py
| @@ -6,9 +6,8 @@ oleid is a script to analyze OLE files such as MS Office documents (e.g. Word, | @@ -6,9 +6,8 @@ oleid is a script to analyze OLE files such as MS Office documents (e.g. Word, | ||
| 6 | Excel), to detect specific characteristics that could potentially indicate that | 6 | Excel), to detect specific characteristics that could potentially indicate that |
| 7 | the file is suspicious or malicious, in terms of security (e.g. malware). | 7 | the file is suspicious or malicious, in terms of security (e.g. malware). |
| 8 | For example it can detect VBA macros, embedded Flash objects, fragmentation. | 8 | For example it can detect VBA macros, embedded Flash objects, fragmentation. |
| 9 | -The results can be displayed or returned as XML for further processing. | ||
| 10 | - | ||
| 11 | -Usage: oleid.py <file> | 9 | +The results is displayed as ascii table (but could be returned or printed in |
| 10 | +other formats like CSV, XML or JSON in future). | ||
| 12 | 11 | ||
| 13 | oleid project website: http://www.decalage.info/python/oleid | 12 | oleid project website: http://www.decalage.info/python/oleid |
| 14 | 13 | ||
| @@ -21,8 +20,8 @@ http://www.decalage.info/python/oletools | @@ -21,8 +20,8 @@ http://www.decalage.info/python/oletools | ||
| 21 | # oleid is copyright (c) 2012-2018, Philippe Lagadec (http://www.decalage.info) | 20 | # oleid is copyright (c) 2012-2018, Philippe Lagadec (http://www.decalage.info) |
| 22 | # All rights reserved. | 21 | # All rights reserved. |
| 23 | # | 22 | # |
| 24 | -# Redistribution and use in source and binary forms, with or without modification, | ||
| 25 | -# are permitted provided that the following conditions are met: | 23 | +# Redistribution and use in source and binary forms, with or without |
| 24 | +# modification, are permitted provided that the following conditions are met: | ||
| 26 | # | 25 | # |
| 27 | # * Redistributions of source code must retain the above copyright notice, this | 26 | # * Redistributions of source code must retain the above copyright notice, this |
| 28 | # list of conditions and the following disclaimer. | 27 | # list of conditions and the following disclaimer. |
| @@ -30,16 +29,17 @@ http://www.decalage.info/python/oletools | @@ -30,16 +29,17 @@ http://www.decalage.info/python/oletools | ||
| 30 | # this list of conditions and the following disclaimer in the documentation | 29 | # this list of conditions and the following disclaimer in the documentation |
| 31 | # and/or other materials provided with the distribution. | 30 | # and/or other materials provided with the distribution. |
| 32 | # | 31 | # |
| 33 | -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
| 34 | -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
| 35 | -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 36 | -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
| 37 | -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 38 | -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| 39 | -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
| 40 | -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| 41 | -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 42 | -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 32 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 33 | +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 34 | +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
| 35 | +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | ||
| 36 | +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
| 37 | +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
| 38 | +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
| 39 | +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
| 40 | +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
| 41 | +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
| 42 | +# POSSIBILITY OF SUCH DAMAGE. | ||
| 43 | 43 | ||
| 44 | # To improve Python 2+3 compatibility: | 44 | # To improve Python 2+3 compatibility: |
| 45 | from __future__ import print_function | 45 | from __future__ import print_function |
| @@ -56,6 +56,8 @@ from __future__ import print_function | @@ -56,6 +56,8 @@ from __future__ import print_function | ||
| 56 | # 2017-04-26 PL: - fixed absolute imports (issue #141) | 56 | # 2017-04-26 PL: - fixed absolute imports (issue #141) |
| 57 | # 2017-09-01 SA: - detect OpenXML encryption | 57 | # 2017-09-01 SA: - detect OpenXML encryption |
| 58 | # 2018-09-11 v0.54 PL: - olefile is now a dependency | 58 | # 2018-09-11 v0.54 PL: - olefile is now a dependency |
| 59 | +# 2018-10-19 CH: - accept olefile as well as filename, return Indicators, | ||
| 60 | +# improve encryption detection for ppt | ||
| 59 | 61 | ||
| 60 | __version__ = '0.54dev1' | 62 | __version__ = '0.54dev1' |
| 61 | 63 | ||
| @@ -78,28 +80,27 @@ __version__ = '0.54dev1' | @@ -78,28 +80,27 @@ __version__ = '0.54dev1' | ||
| 78 | 80 | ||
| 79 | #=== IMPORTS ================================================================= | 81 | #=== IMPORTS ================================================================= |
| 80 | 82 | ||
| 81 | -import optparse, sys, os, re, zlib, struct | 83 | +import argparse, sys, re, zlib, struct |
| 84 | +from os.path import dirname, abspath | ||
| 82 | 85 | ||
| 83 | -# IMPORTANT: it should be possible to run oletools directly as scripts | ||
| 84 | -# in any directory without installing them with pip or setup.py. | ||
| 85 | -# In that case, relative imports are NOT usable. | ||
| 86 | -# And to enable Python 2+3 compatibility, we need to use absolute imports, | ||
| 87 | -# so we add the oletools parent folder to sys.path (absolute+normalized path): | ||
| 88 | -_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) | ||
| 89 | -# print('_thismodule_dir = %r' % _thismodule_dir) | ||
| 90 | -_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..')) | ||
| 91 | -# print('_parent_dir = %r' % _thirdparty_dir) | ||
| 92 | -if not _parent_dir in sys.path: | ||
| 93 | - sys.path.insert(0, _parent_dir) | 86 | +# little hack to allow absolute imports even if oletools is not installed |
| 87 | +# (required to run oletools directly as scripts in any directory). | ||
| 88 | +try: | ||
| 89 | + from oletools.thirdparty import prettytable | ||
| 90 | +except ImportError: | ||
| 91 | + PARENT_DIR = dirname(dirname(abspath(__file__))) | ||
| 92 | + if PARENT_DIR not in sys.path: | ||
| 93 | + sys.path.insert(0, PARENT_DIR) | ||
| 94 | + del PARENT_DIR | ||
| 95 | + from oletools.thirdparty import prettytable | ||
| 94 | 96 | ||
| 95 | import olefile | 97 | import olefile |
| 96 | -from oletools.thirdparty.prettytable import prettytable | ||
| 97 | 98 | ||
| 98 | 99 | ||
| 99 | 100 | ||
| 100 | #=== FUNCTIONS =============================================================== | 101 | #=== FUNCTIONS =============================================================== |
| 101 | 102 | ||
| 102 | -def detect_flash (data): | 103 | +def detect_flash(data): |
| 103 | """ | 104 | """ |
| 104 | Detect Flash objects (SWF files) within a binary string of data | 105 | Detect Flash objects (SWF files) within a binary string of data |
| 105 | return a list of (start_index, length, compressed) tuples, or [] if nothing | 106 | return a list of (start_index, length, compressed) tuples, or [] if nothing |
| @@ -141,7 +142,7 @@ def detect_flash (data): | @@ -141,7 +142,7 @@ def detect_flash (data): | ||
| 141 | compressed_data = swf[8:] | 142 | compressed_data = swf[8:] |
| 142 | try: | 143 | try: |
| 143 | zlib.decompress(compressed_data) | 144 | zlib.decompress(compressed_data) |
| 144 | - except: | 145 | + except Exception: |
| 145 | continue | 146 | continue |
| 146 | # else we don't check anything at this stage, we only assume it is a | 147 | # else we don't check anything at this stage, we only assume it is a |
| 147 | # valid SWF. So there might be false positives for uncompressed SWF. | 148 | # valid SWF. So there might be false positives for uncompressed SWF. |
| @@ -152,9 +153,15 @@ def detect_flash (data): | @@ -152,9 +153,15 @@ def detect_flash (data): | ||
| 152 | 153 | ||
| 153 | #=== CLASSES ================================================================= | 154 | #=== CLASSES ================================================================= |
| 154 | 155 | ||
| 155 | -class Indicator (object): | 156 | +class Indicator(object): |
| 157 | + """ | ||
| 158 | + Piece of information of an :py:class:`OleID` object. | ||
| 159 | + | ||
| 160 | + Contains an ID, value, type, name and description. No other functionality. | ||
| 161 | + """ | ||
| 156 | 162 | ||
| 157 | - def __init__(self, _id, value=None, _type=bool, name=None, description=None): | 163 | + def __init__(self, _id, value=None, _type=bool, name=None, |
| 164 | + description=None): | ||
| 158 | self.id = _id | 165 | self.id = _id |
| 159 | self.value = value | 166 | self.value = value |
| 160 | self.type = _type | 167 | self.type = _type |
| @@ -164,21 +171,55 @@ class Indicator (object): | @@ -164,21 +171,55 @@ class Indicator (object): | ||
| 164 | self.description = description | 171 | self.description = description |
| 165 | 172 | ||
| 166 | 173 | ||
| 167 | -class OleID: | 174 | +class OleID(object): |
| 175 | + """ | ||
| 176 | + Summary of information about an OLE file | ||
| 168 | 177 | ||
| 169 | - def __init__(self, filename): | ||
| 170 | - self.filename = filename | 178 | + Call :py:meth:`OleID.check` to gather all info on a given file or run one |
| 179 | + of the `check_` functions to just get a specific piece of info. | ||
| 180 | + """ | ||
| 181 | + | ||
| 182 | + def __init__(self, input_file): | ||
| 183 | + """ | ||
| 184 | + Create an OleID object | ||
| 185 | + | ||
| 186 | + This does not run any checks yet nor open the file. | ||
| 187 | + | ||
| 188 | + Can either give just a filename (as str), so OleID will check whether | ||
| 189 | + that is a valid OLE file and create a :py:class:`olefile.OleFileIO` | ||
| 190 | + object for it. Or you can give an already opened | ||
| 191 | + :py:class:`olefile.OleFileIO` as argument to avoid re-opening (e.g. if | ||
| 192 | + called from other oletools). | ||
| 193 | + | ||
| 194 | + If filename is given, only :py:meth:`OleID.check` opens the file. Other | ||
| 195 | + functions will return None | ||
| 196 | + """ | ||
| 197 | + if isinstance(input_file, olefile.OleFileIO): | ||
| 198 | + self.ole = input_file | ||
| 199 | + self.filename = None | ||
| 200 | + else: | ||
| 201 | + self.filename = input_file | ||
| 202 | + self.ole = None | ||
| 171 | self.indicators = [] | 203 | self.indicators = [] |
| 204 | + self.suminfo_data = None | ||
| 172 | 205 | ||
| 173 | def check(self): | 206 | def check(self): |
| 207 | + """ | ||
| 208 | + Open file and run all checks on it. | ||
| 209 | + | ||
| 210 | + :returns: list of all :py:class:`Indicator`s created | ||
| 211 | + """ | ||
| 174 | # check if it is actually an OLE file: | 212 | # check if it is actually an OLE file: |
| 175 | oleformat = Indicator('ole_format', True, name='OLE format') | 213 | oleformat = Indicator('ole_format', True, name='OLE format') |
| 176 | self.indicators.append(oleformat) | 214 | self.indicators.append(oleformat) |
| 177 | - if not olefile.isOleFile(self.filename): | 215 | + if self.ole: |
| 216 | + oleformat.value = True | ||
| 217 | + elif not olefile.isOleFile(self.filename): | ||
| 178 | oleformat.value = False | 218 | oleformat.value = False |
| 179 | return self.indicators | 219 | return self.indicators |
| 180 | - # parse file: | ||
| 181 | - self.ole = olefile.OleFileIO(self.filename) | 220 | + else: |
| 221 | + # parse file: | ||
| 222 | + self.ole = olefile.OleFileIO(self.filename) | ||
| 182 | # checks: | 223 | # checks: |
| 183 | self.check_properties() | 224 | self.check_properties() |
| 184 | self.check_encrypted() | 225 | self.check_encrypted() |
| @@ -186,143 +227,274 @@ class OleID: | @@ -186,143 +227,274 @@ class OleID: | ||
| 186 | self.check_excel() | 227 | self.check_excel() |
| 187 | self.check_powerpoint() | 228 | self.check_powerpoint() |
| 188 | self.check_visio() | 229 | self.check_visio() |
| 189 | - self.check_ObjectPool() | 230 | + self.check_object_pool() |
| 190 | self.check_flash() | 231 | self.check_flash() |
| 191 | self.ole.close() | 232 | self.ole.close() |
| 192 | return self.indicators | 233 | return self.indicators |
| 193 | 234 | ||
| 194 | - def check_properties (self): | ||
| 195 | - suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream') | 235 | + def check_properties(self): |
| 236 | + """ | ||
| 237 | + Read summary information required for other check_* functions | ||
| 238 | + | ||
| 239 | + :returns: 2 :py:class:`Indicator`s (for presence of summary info and | ||
| 240 | + application name) or None if file was not opened | ||
| 241 | + """ | ||
| 242 | + suminfo = Indicator('has_suminfo', False, | ||
| 243 | + name='Has SummaryInformation stream') | ||
| 196 | self.indicators.append(suminfo) | 244 | self.indicators.append(suminfo) |
| 197 | - appname = Indicator('appname', 'unknown', _type=str, name='Application name') | 245 | + appname = Indicator('appname', 'unknown', _type=str, |
| 246 | + name='Application name') | ||
| 198 | self.indicators.append(appname) | 247 | self.indicators.append(appname) |
| 199 | - self.suminfo = {} | ||
| 200 | - # check stream SummaryInformation | 248 | + if not self.ole: |
| 249 | + return None, None | ||
| 250 | + self.suminfo_data = {} | ||
| 251 | + # check stream SummaryInformation (not present e.g. in encrypted ppt) | ||
| 201 | if self.ole.exists("\x05SummaryInformation"): | 252 | if self.ole.exists("\x05SummaryInformation"): |
| 202 | suminfo.value = True | 253 | suminfo.value = True |
| 203 | - self.suminfo = self.ole.getproperties("\x05SummaryInformation") | 254 | + self.suminfo_data = self.ole.getproperties("\x05SummaryInformation") |
| 204 | # check application name: | 255 | # check application name: |
| 205 | - appname.value = self.suminfo.get(0x12, 'unknown') | ||
| 206 | - | ||
| 207 | - def check_encrypted (self): | 256 | + appname.value = self.suminfo_data.get(0x12, 'unknown') |
| 257 | + return suminfo, appname | ||
| 258 | + | ||
| 259 | + def get_indicator(self, indicator_id): | ||
| 260 | + """Helper function: returns an indicator if present (or None)""" | ||
| 261 | + result = [indicator for indicator in self.indicators | ||
| 262 | + if indicator.id == indicator_id] | ||
| 263 | + if result: | ||
| 264 | + return result[0] | ||
| 265 | + else: | ||
| 266 | + return None | ||
| 267 | + | ||
| 268 | + def check_encrypted(self): | ||
| 269 | + """ | ||
| 270 | + Check whether this file is encrypted. | ||
| 271 | + | ||
| 272 | + Might call check_properties. | ||
| 273 | + | ||
| 274 | + :returns: :py:class:`Indicator` for encryption or None if file was not | ||
| 275 | + opened | ||
| 276 | + """ | ||
| 208 | # we keep the pointer to the indicator, can be modified by other checks: | 277 | # we keep the pointer to the indicator, can be modified by other checks: |
| 209 | - self.encrypted = Indicator('encrypted', False, name='Encrypted') | ||
| 210 | - self.indicators.append(self.encrypted) | 278 | + encrypted = Indicator('encrypted', False, name='Encrypted') |
| 279 | + self.indicators.append(encrypted) | ||
| 280 | + if not self.ole: | ||
| 281 | + return None | ||
| 211 | # check if bit 1 of security field = 1: | 282 | # check if bit 1 of security field = 1: |
| 212 | # (this field may be missing for Powerpoint2000, for example) | 283 | # (this field may be missing for Powerpoint2000, for example) |
| 213 | - if 0x13 in self.suminfo: | ||
| 214 | - if self.suminfo[0x13] & 1: | ||
| 215 | - self.encrypted.value = True | 284 | + if self.suminfo_data is None: |
| 285 | + self.check_properties() | ||
| 286 | + if 0x13 in self.suminfo_data: | ||
| 287 | + if self.suminfo_data[0x13] & 1: | ||
| 288 | + encrypted.value = True | ||
| 216 | # check if this is an OpenXML encrypted file | 289 | # check if this is an OpenXML encrypted file |
| 217 | elif self.ole.exists('EncryptionInfo'): | 290 | elif self.ole.exists('EncryptionInfo'): |
| 218 | - self.encrypted.value = True | ||
| 219 | - | ||
| 220 | - def check_word (self): | ||
| 221 | - word = Indicator('word', False, name='Word Document', | ||
| 222 | - description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.') | 291 | + encrypted.value = True |
| 292 | + # or an encrypted ppt file | ||
| 293 | + if self.ole.exists('EncryptedSummary') and \ | ||
| 294 | + not self.ole.exists('SummaryInformation'): | ||
| 295 | + encrypted.value = True | ||
| 296 | + return encrypted | ||
| 297 | + | ||
| 298 | + def check_word(self): | ||
| 299 | + """ | ||
| 300 | + Check whether this file is a word document | ||
| 301 | + | ||
| 302 | + If this finds evidence of encryption, will correct/add encryption | ||
| 303 | + indicator. | ||
| 304 | + | ||
| 305 | + :returns: 2 :py:class:`Indicator`s (for word and vba_macro) or None if | ||
| 306 | + file was not opened | ||
| 307 | + """ | ||
| 308 | + word = Indicator( | ||
| 309 | + 'word', False, name='Word Document', | ||
| 310 | + description='Contains a WordDocument stream, very likely to be a ' | ||
| 311 | + 'Microsoft Word Document.') | ||
| 223 | self.indicators.append(word) | 312 | self.indicators.append(word) |
| 224 | - self.macros = Indicator('vba_macros', False, name='VBA Macros') | ||
| 225 | - self.indicators.append(self.macros) | 313 | + macros = Indicator('vba_macros', False, name='VBA Macros') |
| 314 | + self.indicators.append(macros) | ||
| 315 | + if not self.ole: | ||
| 316 | + return None, None | ||
| 226 | if self.ole.exists('WordDocument'): | 317 | if self.ole.exists('WordDocument'): |
| 227 | word.value = True | 318 | word.value = True |
| 228 | # check for Word-specific encryption flag: | 319 | # check for Word-specific encryption flag: |
| 229 | - s = self.ole.openstream(["WordDocument"]) | ||
| 230 | - # pass header 10 bytes | ||
| 231 | - s.read(10) | ||
| 232 | - # read flag structure: | ||
| 233 | - temp16 = struct.unpack("H", s.read(2))[0] | ||
| 234 | - fEncrypted = (temp16 & 0x0100) >> 8 | ||
| 235 | - if fEncrypted: | ||
| 236 | - self.encrypted.value = True | ||
| 237 | - s.close() | 320 | + stream = None |
| 321 | + try: | ||
| 322 | + stream = self.ole.openstream(["WordDocument"]) | ||
| 323 | + # pass header 10 bytes | ||
| 324 | + stream.read(10) | ||
| 325 | + # read flag structure: | ||
| 326 | + temp16 = struct.unpack("H", stream.read(2))[0] | ||
| 327 | + f_encrypted = (temp16 & 0x0100) >> 8 | ||
| 328 | + if f_encrypted: | ||
| 329 | + # correct encrypted indicator if present or add one | ||
| 330 | + encrypt_ind = self.get_indicator('encrypted') | ||
| 331 | + if encrypt_ind: | ||
| 332 | + encrypt_ind.value = True | ||
| 333 | + else: | ||
| 334 | + self.indicators.append('encrypted', True, name='Encrypted') | ||
| 335 | + except Exception: | ||
| 336 | + raise | ||
| 337 | + finally: | ||
| 338 | + if stream is not None: | ||
| 339 | + stream.close() | ||
| 238 | # check for VBA macros: | 340 | # check for VBA macros: |
| 239 | if self.ole.exists('Macros'): | 341 | if self.ole.exists('Macros'): |
| 240 | - self.macros.value = True | 342 | + macros.value = True |
| 343 | + return word, macros | ||
| 344 | + | ||
| 345 | + def check_excel(self): | ||
| 346 | + """ | ||
| 347 | + Check whether this file is an excel workbook. | ||
| 348 | + | ||
| 349 | + If this finds macros, will add/correct macro indicator. | ||
| 241 | 350 | ||
| 242 | - def check_excel (self): | ||
| 243 | - excel = Indicator('excel', False, name='Excel Workbook', | ||
| 244 | - description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.') | 351 | + see also: :py:func:`xls_parser.is_xls` |
| 352 | + | ||
| 353 | + :returns: :py:class:`Indicator` for excel or (None, None) if file was | ||
| 354 | + not opened | ||
| 355 | + """ | ||
| 356 | + excel = Indicator( | ||
| 357 | + 'excel', False, name='Excel Workbook', | ||
| 358 | + description='Contains a Workbook or Book stream, very likely to be ' | ||
| 359 | + 'a Microsoft Excel Workbook.') | ||
| 245 | self.indicators.append(excel) | 360 | self.indicators.append(excel) |
| 361 | + if not self.ole: | ||
| 362 | + return None | ||
| 246 | #self.macros = Indicator('vba_macros', False, name='VBA Macros') | 363 | #self.macros = Indicator('vba_macros', False, name='VBA Macros') |
| 247 | #self.indicators.append(self.macros) | 364 | #self.indicators.append(self.macros) |
| 248 | if self.ole.exists('Workbook') or self.ole.exists('Book'): | 365 | if self.ole.exists('Workbook') or self.ole.exists('Book'): |
| 249 | excel.value = True | 366 | excel.value = True |
| 250 | # check for VBA macros: | 367 | # check for VBA macros: |
| 251 | if self.ole.exists('_VBA_PROJECT_CUR'): | 368 | if self.ole.exists('_VBA_PROJECT_CUR'): |
| 252 | - self.macros.value = True | ||
| 253 | - | ||
| 254 | - def check_powerpoint (self): | ||
| 255 | - ppt = Indicator('ppt', False, name='PowerPoint Presentation', | ||
| 256 | - description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.') | 369 | + # correct macro indicator if present or add one |
| 370 | + macro_ind = self.get_indicator('vba_macros') | ||
| 371 | + if macro_ind: | ||
| 372 | + macro_ind.value = True | ||
| 373 | + else: | ||
| 374 | + self.indicators.append('vba_macros', True, | ||
| 375 | + name='VBA Macros') | ||
| 376 | + return excel | ||
| 377 | + | ||
| 378 | + def check_powerpoint(self): | ||
| 379 | + """ | ||
| 380 | + Check whether this file is a powerpoint presentation | ||
| 381 | + | ||
| 382 | + see also: :py:func:`ppt_record_parser.is_ppt` | ||
| 383 | + | ||
| 384 | + :returns: :py:class:`Indicator` for whether this is a powerpoint | ||
| 385 | + presentation or not or None if file was not opened | ||
| 386 | + """ | ||
| 387 | + ppt = Indicator( | ||
| 388 | + 'ppt', False, name='PowerPoint Presentation', | ||
| 389 | + description='Contains a PowerPoint Document stream, very likely to ' | ||
| 390 | + 'be a Microsoft PowerPoint Presentation.') | ||
| 257 | self.indicators.append(ppt) | 391 | self.indicators.append(ppt) |
| 392 | + if not self.ole: | ||
| 393 | + return None | ||
| 258 | if self.ole.exists('PowerPoint Document'): | 394 | if self.ole.exists('PowerPoint Document'): |
| 259 | ppt.value = True | 395 | ppt.value = True |
| 260 | - | ||
| 261 | - def check_visio (self): | ||
| 262 | - visio = Indicator('visio', False, name='Visio Drawing', | ||
| 263 | - description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.') | 396 | + return ppt |
| 397 | + | ||
| 398 | + def check_visio(self): | ||
| 399 | + """Check whether this file is a visio drawing""" | ||
| 400 | + visio = Indicator( | ||
| 401 | + 'visio', False, name='Visio Drawing', | ||
| 402 | + description='Contains a VisioDocument stream, very likely to be a ' | ||
| 403 | + 'Microsoft Visio Drawing.') | ||
| 264 | self.indicators.append(visio) | 404 | self.indicators.append(visio) |
| 405 | + if not self.ole: | ||
| 406 | + return None | ||
| 265 | if self.ole.exists('VisioDocument'): | 407 | if self.ole.exists('VisioDocument'): |
| 266 | visio.value = True | 408 | visio.value = True |
| 409 | + return visio | ||
| 410 | + | ||
| 411 | + def check_object_pool(self): | ||
| 412 | + """ | ||
| 413 | + Check whether this file contains an ObjectPool stream. | ||
| 414 | + | ||
| 415 | + Such a stream would be a strong indicator for embedded objects or files. | ||
| 267 | 416 | ||
| 268 | - def check_ObjectPool (self): | ||
| 269 | - objpool = Indicator('ObjectPool', False, name='ObjectPool', | ||
| 270 | - description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.') | 417 | + :returns: :py:class:`Indicator` for ObjectPool stream or None if file |
| 418 | + was not opened | ||
| 419 | + """ | ||
| 420 | + objpool = Indicator( | ||
| 421 | + 'ObjectPool', False, name='ObjectPool', | ||
| 422 | + description='Contains an ObjectPool stream, very likely to contain ' | ||
| 423 | + 'embedded OLE objects or files.') | ||
| 271 | self.indicators.append(objpool) | 424 | self.indicators.append(objpool) |
| 425 | + if not self.ole: | ||
| 426 | + return None | ||
| 272 | if self.ole.exists('ObjectPool'): | 427 | if self.ole.exists('ObjectPool'): |
| 273 | objpool.value = True | 428 | objpool.value = True |
| 274 | - | ||
| 275 | - | ||
| 276 | - def check_flash (self): | ||
| 277 | - flash = Indicator('flash', 0, _type=int, name='Flash objects', | ||
| 278 | - description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.') | 429 | + return objpool |
| 430 | + | ||
| 431 | + def check_flash(self): | ||
| 432 | + """ | ||
| 433 | + Check whether this file contains flash objects | ||
| 434 | + | ||
| 435 | + :returns: :py:class:`Indicator` for count of flash objects or None if | ||
| 436 | + file was not opened | ||
| 437 | + """ | ||
| 438 | + flash = Indicator( | ||
| 439 | + 'flash', 0, _type=int, name='Flash objects', | ||
| 440 | + description='Number of embedded Flash objects (SWF files) detected ' | ||
| 441 | + 'in OLE streams. Not 100% accurate, there may be false ' | ||
| 442 | + 'positives.') | ||
| 279 | self.indicators.append(flash) | 443 | self.indicators.append(flash) |
| 444 | + if not self.ole: | ||
| 445 | + return None | ||
| 280 | for stream in self.ole.listdir(): | 446 | for stream in self.ole.listdir(): |
| 281 | data = self.ole.openstream(stream).read() | 447 | data = self.ole.openstream(stream).read() |
| 282 | found = detect_flash(data) | 448 | found = detect_flash(data) |
| 283 | # just add to the count of Flash objects: | 449 | # just add to the count of Flash objects: |
| 284 | flash.value += len(found) | 450 | flash.value += len(found) |
| 285 | #print stream, found | 451 | #print stream, found |
| 452 | + return flash | ||
| 286 | 453 | ||
| 287 | 454 | ||
| 288 | #=== MAIN ================================================================= | 455 | #=== MAIN ================================================================= |
| 289 | 456 | ||
| 290 | def main(): | 457 | def main(): |
| 458 | + """Called when running this file as script. Shows all info on input file.""" | ||
| 291 | # print banner with version | 459 | # print banner with version |
| 292 | - print ('oleid %s - http://decalage.info/oletools' % __version__) | ||
| 293 | - print ('THIS IS WORK IN PROGRESS - Check updates regularly!') | ||
| 294 | - print ('Please report any issue at https://github.com/decalage2/oletools/issues') | ||
| 295 | - print ('') | 460 | + print('oleid %s - http://decalage.info/oletools' % __version__) |
| 461 | + print('THIS IS WORK IN PROGRESS - Check updates regularly!') | ||
| 462 | + print('Please report any issue at ' | ||
| 463 | + 'https://github.com/decalage2/oletools/issues') | ||
| 464 | + print('') | ||
| 296 | 465 | ||
| 297 | - usage = 'usage: %prog [options] <file>' | ||
| 298 | - parser = optparse.OptionParser(usage=__doc__ + '\n' + usage) | ||
| 299 | -## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream') | 466 | + parser = argparse.ArgumentParser(description=__doc__) |
| 467 | + parser.add_argument('input', type=str, nargs='*', metavar='FILE', | ||
| 468 | + help='Name of files to process') | ||
| 469 | + # parser.add_argument('-o', '--ole', action='store_true', dest='ole', | ||
| 470 | + # help='Parse an OLE file (e.g. Word, Excel) to look for ' | ||
| 471 | + # 'SWF in each stream') | ||
| 300 | 472 | ||
| 301 | - (options, args) = parser.parse_args() | 473 | + args = parser.parse_args() |
| 302 | 474 | ||
| 303 | # Print help if no argurments are passed | 475 | # Print help if no argurments are passed |
| 304 | - if len(args) == 0: | 476 | + if len(args.input) == 0: |
| 305 | parser.print_help() | 477 | parser.print_help() |
| 306 | return | 478 | return |
| 307 | 479 | ||
| 308 | - for filename in args: | 480 | + for filename in args.input: |
| 309 | print('Filename:', filename) | 481 | print('Filename:', filename) |
| 310 | oleid = OleID(filename) | 482 | oleid = OleID(filename) |
| 311 | indicators = oleid.check() | 483 | indicators = oleid.check() |
| 312 | 484 | ||
| 313 | #TODO: add description | 485 | #TODO: add description |
| 314 | #TODO: highlight suspicious indicators | 486 | #TODO: highlight suspicious indicators |
| 315 | - t = prettytable.PrettyTable(['Indicator', 'Value']) | ||
| 316 | - t.align = 'l' | ||
| 317 | - t.max_width = 39 | ||
| 318 | - #t.border = False | 487 | + table = prettytable.PrettyTable(['Indicator', 'Value']) |
| 488 | + table.align = 'l' | ||
| 489 | + table.max_width = 39 | ||
| 490 | + table.border = False | ||
| 319 | 491 | ||
| 320 | for indicator in indicators: | 492 | for indicator in indicators: |
| 321 | #print '%s: %s' % (indicator.name, indicator.value) | 493 | #print '%s: %s' % (indicator.name, indicator.value) |
| 322 | - t.add_row((indicator.name, indicator.value)) | 494 | + table.add_row((indicator.name, indicator.value)) |
| 323 | 495 | ||
| 324 | - print(t) | ||
| 325 | - print ('') | 496 | + print(table) |
| 497 | + print('') | ||
| 326 | 498 | ||
| 327 | if __name__ == '__main__': | 499 | if __name__ == '__main__': |
| 328 | main() | 500 | main() |
oletools/olevba.py
| @@ -14,6 +14,7 @@ Supported formats: | @@ -14,6 +14,7 @@ Supported formats: | ||
| 14 | - Word 2003 XML (.xml) | 14 | - Word 2003 XML (.xml) |
| 15 | - Word/Excel Single File Web Page / MHTML (.mht) | 15 | - Word/Excel Single File Web Page / MHTML (.mht) |
| 16 | - Publisher (.pub) | 16 | - Publisher (.pub) |
| 17 | +- raises an error if run with files encrypted using MS Crypto API RC4 | ||
| 17 | 18 | ||
| 18 | Author: Philippe Lagadec - http://www.decalage.info | 19 | Author: Philippe Lagadec - http://www.decalage.info |
| 19 | License: BSD, see source code or documentation | 20 | License: BSD, see source code or documentation |
| @@ -208,6 +209,7 @@ from __future__ import print_function | @@ -208,6 +209,7 @@ from __future__ import print_function | ||
| 208 | # (issue #283) | 209 | # (issue #283) |
| 209 | # 2018-09-11 v0.54 PL: - olefile is now a dependency | 210 | # 2018-09-11 v0.54 PL: - olefile is now a dependency |
| 210 | # 2018-10-08 PL: - replace backspace before printing to console (issue #358) | 211 | # 2018-10-08 PL: - replace backspace before printing to console (issue #358) |
| 212 | +# 2018-10-25 CH: - detect encryption and raise error if detected | ||
| 211 | 213 | ||
| 212 | __version__ = '0.54dev2' | 214 | __version__ = '0.54dev2' |
| 213 | 215 | ||
| @@ -309,6 +311,8 @@ from pyparsing import \ | @@ -309,6 +311,8 @@ from pyparsing import \ | ||
| 309 | from oletools import ppt_parser | 311 | from oletools import ppt_parser |
| 310 | from oletools import oleform | 312 | from oletools import oleform |
| 311 | from oletools import rtfobj | 313 | from oletools import rtfobj |
| 314 | +from oletools import oleid | ||
| 315 | +from oletools.common.errors import FileIsEncryptedError | ||
| 312 | 316 | ||
| 313 | 317 | ||
| 314 | # monkeypatch email to fix issue #32: | 318 | # monkeypatch email to fix issue #32: |
| @@ -472,6 +476,7 @@ RETURN_OPEN_ERROR = 5 | @@ -472,6 +476,7 @@ RETURN_OPEN_ERROR = 5 | ||
| 472 | RETURN_PARSE_ERROR = 6 | 476 | RETURN_PARSE_ERROR = 6 |
| 473 | RETURN_SEVERAL_ERRS = 7 | 477 | RETURN_SEVERAL_ERRS = 7 |
| 474 | RETURN_UNEXPECTED = 8 | 478 | RETURN_UNEXPECTED = 8 |
| 479 | +RETURN_ENCRYPTED = 9 | ||
| 475 | 480 | ||
| 476 | # MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python) | 481 | # MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python) |
| 477 | MAC_CODEPAGES = { | 482 | MAC_CODEPAGES = { |
| @@ -2367,6 +2372,12 @@ class VBA_Parser(object): | @@ -2367,6 +2372,12 @@ class VBA_Parser(object): | ||
| 2367 | # This looks like an OLE file | 2372 | # This looks like an OLE file |
| 2368 | self.open_ole(_file) | 2373 | self.open_ole(_file) |
| 2369 | 2374 | ||
| 2375 | + # check whether file is encrypted (need to do this before try ppt) | ||
| 2376 | + log.debug('Check encryption of ole file') | ||
| 2377 | + crypt_indicator = oleid.OleID(self.ole_file).check_encrypted() | ||
| 2378 | + if crypt_indicator.value: | ||
| 2379 | + raise FileIsEncryptedError(filename) | ||
| 2380 | + | ||
| 2370 | # if this worked, try whether it is a ppt file (special ole file) | 2381 | # if this worked, try whether it is a ppt file (special ole file) |
| 2371 | self.open_ppt() | 2382 | self.open_ppt() |
| 2372 | if self.type is None and is_zipfile(_file): | 2383 | if self.type is None and is_zipfile(_file): |
| @@ -3634,6 +3645,16 @@ def main(cmd_line_args=None): | @@ -3634,6 +3645,16 @@ def main(cmd_line_args=None): | ||
| 3634 | % (filename, exc.orig_exc)) | 3645 | % (filename, exc.orig_exc)) |
| 3635 | return_code = RETURN_PARSE_ERROR if return_code == 0 \ | 3646 | return_code = RETURN_PARSE_ERROR if return_code == 0 \ |
| 3636 | else RETURN_SEVERAL_ERRS | 3647 | else RETURN_SEVERAL_ERRS |
| 3648 | + except FileIsEncryptedError as exc: | ||
| 3649 | + if options.output_mode in ('triage', 'unspecified'): | ||
| 3650 | + print('%-12s %s - File is encrypted' % ('!ERROR', filename)) | ||
| 3651 | + elif options.output_mode == 'json': | ||
| 3652 | + print_json(file=filename, type='error', | ||
| 3653 | + error=type(exc).__name__, message=str(exc)) | ||
| 3654 | + else: | ||
| 3655 | + log.exception('File %s is encrypted!' % (filename)) | ||
| 3656 | + return_code = RETURN_ENCRYPTED if return_code == 0 \ | ||
| 3657 | + else RETURN_SEVERAL_ERRS | ||
| 3637 | # Here we do not close the vba_parser, because process_file may need it below. | 3658 | # Here we do not close the vba_parser, because process_file may need it below. |
| 3638 | 3659 | ||
| 3639 | if options.output_mode == 'triage': | 3660 | if options.output_mode == 'triage': |
oletools/olevba3.py
| @@ -16,6 +16,7 @@ Supported formats: | @@ -16,6 +16,7 @@ Supported formats: | ||
| 16 | - Word 2003 XML (.xml) | 16 | - Word 2003 XML (.xml) |
| 17 | - Word/Excel Single File Web Page / MHTML (.mht) | 17 | - Word/Excel Single File Web Page / MHTML (.mht) |
| 18 | - Publisher (.pub) | 18 | - Publisher (.pub) |
| 19 | +- raises an error if run with files encrypted using MS Crypto API RC4 | ||
| 19 | 20 | ||
| 20 | Author: Philippe Lagadec - http://www.decalage.info | 21 | Author: Philippe Lagadec - http://www.decalage.info |
| 21 | License: BSD, see source code or documentation | 22 | License: BSD, see source code or documentation |
| @@ -207,6 +208,7 @@ from __future__ import print_function | @@ -207,6 +208,7 @@ from __future__ import print_function | ||
| 207 | # 2018-06-11 v0.53.1 MHW: - fixed #320: chr instead of unichr on python 3 | 208 | # 2018-06-11 v0.53.1 MHW: - fixed #320: chr instead of unichr on python 3 |
| 208 | # 2018-06-12 MHW: - fixed #322: import reduce from functools | 209 | # 2018-06-12 MHW: - fixed #322: import reduce from functools |
| 209 | # 2018-09-11 v0.54 PL: - olefile is now a dependency | 210 | # 2018-09-11 v0.54 PL: - olefile is now a dependency |
| 211 | +# 2018-10-25 CH: - detect encryption and raise error if detected | ||
| 210 | 212 | ||
| 211 | __version__ = '0.54dev1' | 213 | __version__ = '0.54dev1' |
| 212 | 214 | ||
| @@ -247,7 +249,6 @@ import os | @@ -247,7 +249,6 @@ import os | ||
| 247 | import logging | 249 | import logging |
| 248 | import struct | 250 | import struct |
| 249 | from _io import StringIO,BytesIO | 251 | from _io import StringIO,BytesIO |
| 250 | -from oletools import rtfobj | ||
| 251 | import math | 252 | import math |
| 252 | import zipfile | 253 | import zipfile |
| 253 | import re | 254 | import re |
| @@ -298,6 +299,9 @@ from pyparsing import \ | @@ -298,6 +299,9 @@ from pyparsing import \ | ||
| 298 | alphanums, alphas, hexnums,nums, opAssoc, srange, \ | 299 | alphanums, alphas, hexnums,nums, opAssoc, srange, \ |
| 299 | infixNotation, ParserElement | 300 | infixNotation, ParserElement |
| 300 | import oletools.ppt_parser as ppt_parser | 301 | import oletools.ppt_parser as ppt_parser |
| 302 | +from oletools import rtfobj | ||
| 303 | +from oletools import oleid | ||
| 304 | +from oletools.common.errors import FileIsEncryptedError | ||
| 301 | 305 | ||
| 302 | # monkeypatch email to fix issue #32: | 306 | # monkeypatch email to fix issue #32: |
| 303 | # allow header lines without ":" | 307 | # allow header lines without ":" |
| @@ -479,6 +483,7 @@ RETURN_OPEN_ERROR = 5 | @@ -479,6 +483,7 @@ RETURN_OPEN_ERROR = 5 | ||
| 479 | RETURN_PARSE_ERROR = 6 | 483 | RETURN_PARSE_ERROR = 6 |
| 480 | RETURN_SEVERAL_ERRS = 7 | 484 | RETURN_SEVERAL_ERRS = 7 |
| 481 | RETURN_UNEXPECTED = 8 | 485 | RETURN_UNEXPECTED = 8 |
| 486 | +RETURN_ENCRYPTED = 9 | ||
| 482 | 487 | ||
| 483 | # MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python) | 488 | # MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python) |
| 484 | MAC_CODEPAGES = { | 489 | MAC_CODEPAGES = { |
| @@ -2360,6 +2365,12 @@ class VBA_Parser(object): | @@ -2360,6 +2365,12 @@ class VBA_Parser(object): | ||
| 2360 | # This looks like an OLE file | 2365 | # This looks like an OLE file |
| 2361 | self.open_ole(_file) | 2366 | self.open_ole(_file) |
| 2362 | 2367 | ||
| 2368 | + # check whether file is encrypted (need to do this before try ppt) | ||
| 2369 | + log.debug('Check encryption of ole file') | ||
| 2370 | + crypt_indicator = oleid.OleID(self.ole_file).check_encrypted() | ||
| 2371 | + if crypt_indicator.value: | ||
| 2372 | + raise FileIsEncryptedError(filename) | ||
| 2373 | + | ||
| 2363 | # if this worked, try whether it is a ppt file (special ole file) | 2374 | # if this worked, try whether it is a ppt file (special ole file) |
| 2364 | self.open_ppt() | 2375 | self.open_ppt() |
| 2365 | if self.type is None and is_zipfile(_file): | 2376 | if self.type is None and is_zipfile(_file): |
| @@ -3594,6 +3605,18 @@ def main(cmd_line_args=None): | @@ -3594,6 +3605,18 @@ def main(cmd_line_args=None): | ||
| 3594 | % (filename, exc.orig_exc)) | 3605 | % (filename, exc.orig_exc)) |
| 3595 | return_code = RETURN_PARSE_ERROR if return_code == 0 \ | 3606 | return_code = RETURN_PARSE_ERROR if return_code == 0 \ |
| 3596 | else RETURN_SEVERAL_ERRS | 3607 | else RETURN_SEVERAL_ERRS |
| 3608 | + except FileIsEncryptedError as exc: | ||
| 3609 | + if options.output_mode in ('triage', 'unspecified'): | ||
| 3610 | + print('%-12s %s - File is encrypted' % ('!ERROR', filename)) | ||
| 3611 | + elif options.output_mode == 'json': | ||
| 3612 | + print_json(file=filename, type='error', | ||
| 3613 | + error=type(exc).__name__, message=str(exc)) | ||
| 3614 | + else: | ||
| 3615 | + log.exception('File %s is encrypted!' % (filename)) | ||
| 3616 | + return_code = RETURN_ENCRYPTED if return_code == 0 \ | ||
| 3617 | + else RETURN_SEVERAL_ERRS | ||
| 3618 | + # Here we do not close the vba_parser, because process_file may need it below. | ||
| 3619 | + | ||
| 3597 | finally: | 3620 | finally: |
| 3598 | if vba_parser is not None: | 3621 | if vba_parser is not None: |
| 3599 | vba_parser.close() | 3622 | vba_parser.close() |
oletools/ooxml.py
| @@ -9,6 +9,8 @@ See also: Notes on Microsoft's implementation of ECMA-376: [MS-0E376] | @@ -9,6 +9,8 @@ See also: Notes on Microsoft's implementation of ECMA-376: [MS-0E376] | ||
| 9 | 9 | ||
| 10 | TODO: may have to tell apart single xml types: office2003 looks much different | 10 | TODO: may have to tell apart single xml types: office2003 looks much different |
| 11 | than 2006+ --> DOCTYPE_*_XML2003 | 11 | than 2006+ --> DOCTYPE_*_XML2003 |
| 12 | +TODO: check what is duplicate here with oleid, maybe merge some day? | ||
| 13 | +TODO: "xml2003" == "flatopc"? | ||
| 12 | 14 | ||
| 13 | .. codeauthor:: Intra2net AG <info@intra2net> | 15 | .. codeauthor:: Intra2net AG <info@intra2net> |
| 14 | """ | 16 | """ |
oletools/ppt_record_parser.py
| @@ -63,6 +63,7 @@ except ImportError: | @@ -63,6 +63,7 @@ except ImportError: | ||
| 63 | sys.path.insert(0, PARENT_DIR) | 63 | sys.path.insert(0, PARENT_DIR) |
| 64 | del PARENT_DIR | 64 | del PARENT_DIR |
| 65 | from oletools import record_base | 65 | from oletools import record_base |
| 66 | +from oletools.common.errors import FileIsEncryptedError | ||
| 66 | 67 | ||
| 67 | 68 | ||
| 68 | # types of relevant records (there are much more than listed here) | 69 | # types of relevant records (there are much more than listed here) |
| @@ -147,13 +148,17 @@ def is_ppt(filename): | @@ -147,13 +148,17 @@ def is_ppt(filename): | ||
| 147 | 148 | ||
| 148 | Param filename can be anything that OleFileIO constructor accepts: name of | 149 | Param filename can be anything that OleFileIO constructor accepts: name of |
| 149 | file or file data or data stream. | 150 | file or file data or data stream. |
| 151 | + | ||
| 152 | + see also: oleid.OleID.check_powerpoint | ||
| 150 | """ | 153 | """ |
| 151 | have_current_user = False | 154 | have_current_user = False |
| 152 | have_user_edit = False | 155 | have_user_edit = False |
| 153 | have_persist_dir = False | 156 | have_persist_dir = False |
| 154 | have_document_container = False | 157 | have_document_container = False |
| 158 | + ppt_file = None | ||
| 155 | try: | 159 | try: |
| 156 | - for stream in PptFile(filename).iter_streams(): | 160 | + ppt_file = PptFile(filename) |
| 161 | + for stream in ppt_file.iter_streams(): | ||
| 157 | if stream.name == 'Current User': | 162 | if stream.name == 'Current User': |
| 158 | for record in stream.iter_records(): | 163 | for record in stream.iter_records(): |
| 159 | if isinstance(record, PptRecordCurrentUser): | 164 | if isinstance(record, PptRecordCurrentUser): |
| @@ -176,6 +181,11 @@ def is_ppt(filename): | @@ -176,6 +181,11 @@ def is_ppt(filename): | ||
| 176 | return True | 181 | return True |
| 177 | else: # ignore other streams/storages since they are optional | 182 | else: # ignore other streams/storages since they are optional |
| 178 | continue | 183 | continue |
| 184 | + except FileIsEncryptedError: | ||
| 185 | + assert ppt_file is not None, \ | ||
| 186 | + 'Encryption error should not be raised from just opening OLE file.' | ||
| 187 | + # just rely on stream names, copied from oleid | ||
| 188 | + return ppt_file.exists('PowerPoint Document') | ||
| 179 | except Exception: | 189 | except Exception: |
| 180 | pass | 190 | pass |
| 181 | return False | 191 | return False |
oletools/record_base.py
| @@ -44,6 +44,7 @@ __version__ = '0.54dev1' | @@ -44,6 +44,7 @@ __version__ = '0.54dev1' | ||
| 44 | # TODO: | 44 | # TODO: |
| 45 | # - read DocumentSummaryInformation first to get more info about streams | 45 | # - read DocumentSummaryInformation first to get more info about streams |
| 46 | # (maybe content type or so; identify streams that are never record-based) | 46 | # (maybe content type or so; identify streams that are never record-based) |
| 47 | +# Or use oleid to avoid same functionality in several files | ||
| 47 | # - think about integrating this with olefile itself | 48 | # - think about integrating this with olefile itself |
| 48 | 49 | ||
| 49 | # ----------------------------------------------------------------------------- | 50 | # ----------------------------------------------------------------------------- |
| @@ -62,6 +63,18 @@ import logging | @@ -62,6 +63,18 @@ import logging | ||
| 62 | 63 | ||
| 63 | import olefile | 64 | import olefile |
| 64 | 65 | ||
| 66 | +try: | ||
| 67 | + from oletools.common.errors import FileIsEncryptedError | ||
| 68 | +except ImportError: | ||
| 69 | + # little hack to allow absolute imports even if oletools is not installed. | ||
| 70 | + PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname( | ||
| 71 | + os.path.abspath(__file__)))) | ||
| 72 | + if PARENT_DIR not in sys.path: | ||
| 73 | + sys.path.insert(0, PARENT_DIR) | ||
| 74 | + del PARENT_DIR | ||
| 75 | + from oletools.common.errors import FileIsEncryptedError | ||
| 76 | +from oletools import oleid | ||
| 77 | + | ||
| 65 | 78 | ||
| 66 | ############################################################################### | 79 | ############################################################################### |
| 67 | # Helpers | 80 | # Helpers |
| @@ -111,6 +124,12 @@ class OleRecordFile(olefile.OleFileIO): | @@ -111,6 +124,12 @@ class OleRecordFile(olefile.OleFileIO): | ||
| 111 | Subclass of OleFileIO! | 124 | Subclass of OleFileIO! |
| 112 | """ | 125 | """ |
| 113 | 126 | ||
| 127 | + def open(self, filename, *args, **kwargs): | ||
| 128 | + """Call OleFileIO.open, raise error if is encrypted.""" | ||
| 129 | + #super(OleRecordFile, self).open(filename, *args, **kwargs) | ||
| 130 | + OleFileIO.open(self, filename, *args, **kwargs) | ||
| 131 | + self.is_encrypted = oleid.OleID(self).check_encrypted().value | ||
| 132 | + | ||
| 114 | @classmethod | 133 | @classmethod |
| 115 | def stream_class_for_name(cls, stream_name): | 134 | def stream_class_for_name(cls, stream_name): |
| 116 | """ helper for iter_streams, must be overwritten in subclasses | 135 | """ helper for iter_streams, must be overwritten in subclasses |
| @@ -142,7 +161,8 @@ class OleRecordFile(olefile.OleFileIO): | @@ -142,7 +161,8 @@ class OleRecordFile(olefile.OleFileIO): | ||
| 142 | stream = clz(self._open(direntry.isectStart, direntry.size), | 161 | stream = clz(self._open(direntry.isectStart, direntry.size), |
| 143 | direntry.size, | 162 | direntry.size, |
| 144 | None if is_orphan else direntry.name, | 163 | None if is_orphan else direntry.name, |
| 145 | - direntry.entry_type) | 164 | + direntry.entry_type, |
| 165 | + self.is_encrypted) | ||
| 146 | yield stream | 166 | yield stream |
| 147 | stream.close() | 167 | stream.close() |
| 148 | 168 | ||
| @@ -155,13 +175,14 @@ class OleRecordStream(object): | @@ -155,13 +175,14 @@ class OleRecordStream(object): | ||
| 155 | abstract base class | 175 | abstract base class |
| 156 | """ | 176 | """ |
| 157 | 177 | ||
| 158 | - def __init__(self, stream, size, name, stream_type): | 178 | + def __init__(self, stream, size, name, stream_type, is_encrypted=False): |
| 159 | self.stream = stream | 179 | self.stream = stream |
| 160 | self.size = size | 180 | self.size = size |
| 161 | self.name = name | 181 | self.name = name |
| 162 | if stream_type not in ENTRY_TYPE2STR: | 182 | if stream_type not in ENTRY_TYPE2STR: |
| 163 | raise ValueError('Unknown stream type: {0}'.format(stream_type)) | 183 | raise ValueError('Unknown stream type: {0}'.format(stream_type)) |
| 164 | self.stream_type = stream_type | 184 | self.stream_type = stream_type |
| 185 | + self.is_encrypted = is_encrypted | ||
| 165 | 186 | ||
| 166 | def read_record_head(self): | 187 | def read_record_head(self): |
| 167 | """ read first few bytes of record to determine size and type | 188 | """ read first few bytes of record to determine size and type |
| @@ -190,6 +211,9 @@ class OleRecordStream(object): | @@ -190,6 +211,9 @@ class OleRecordStream(object): | ||
| 190 | 211 | ||
| 191 | Stream must be positioned at start of records (e.g. start of stream). | 212 | Stream must be positioned at start of records (e.g. start of stream). |
| 192 | """ | 213 | """ |
| 214 | + if self.is_encrypted: | ||
| 215 | + raise FileIsEncryptedError() | ||
| 216 | + | ||
| 193 | while True: | 217 | while True: |
| 194 | # unpacking as in olevba._extract_vba | 218 | # unpacking as in olevba._extract_vba |
| 195 | pos = self.stream.tell() | 219 | pos = self.stream.tell() |
| @@ -234,6 +258,8 @@ class OleSummaryInformationStream(OleRecordStream): | @@ -234,6 +258,8 @@ class OleSummaryInformationStream(OleRecordStream): | ||
| 234 | 258 | ||
| 235 | Do nothing so far. OleFileIO reads quite some info from this. For more info | 259 | Do nothing so far. OleFileIO reads quite some info from this. For more info |
| 236 | see [MS-OSHARED] 2.3.3 and [MS-OLEPS] 2.21 and references therein. | 260 | see [MS-OSHARED] 2.3.3 and [MS-OLEPS] 2.21 and references therein. |
| 261 | + | ||
| 262 | + See also: info read in oleid.py. | ||
| 237 | """ | 263 | """ |
| 238 | def iter_records(self, fill_data=False): | 264 | def iter_records(self, fill_data=False): |
| 239 | """ yields nothing, stops at once """ | 265 | """ yields nothing, stops at once """ |
oletools/xls_parser.py
| @@ -86,14 +86,16 @@ def is_xls(filename): | @@ -86,14 +86,16 @@ def is_xls(filename): | ||
| 86 | returns True if given file is an ole file and contains a Workbook stream | 86 | returns True if given file is an ole file and contains a Workbook stream |
| 87 | 87 | ||
| 88 | todo: could further check that workbook stream starts with a globals | 88 | todo: could further check that workbook stream starts with a globals |
| 89 | - substream | 89 | + substream. |
| 90 | + See also: oleid.OleID.check_excel | ||
| 90 | """ | 91 | """ |
| 91 | try: | 92 | try: |
| 92 | for stream in XlsFile(filename).iter_streams(): | 93 | for stream in XlsFile(filename).iter_streams(): |
| 93 | if isinstance(stream, WorkbookStream): | 94 | if isinstance(stream, WorkbookStream): |
| 94 | return True | 95 | return True |
| 95 | except Exception: | 96 | except Exception: |
| 96 | - return False | 97 | + pass |
| 98 | + return False | ||
| 97 | 99 | ||
| 98 | 100 | ||
| 99 | def read_unicode(data, start_idx, n_chars): | 101 | def read_unicode(data, start_idx, n_chars): |
| @@ -130,6 +132,8 @@ class XlsFile(record_base.OleRecordFile): | @@ -130,6 +132,8 @@ class XlsFile(record_base.OleRecordFile): | ||
| 130 | @classmethod | 132 | @classmethod |
| 131 | def stream_class_for_name(cls, stream_name): | 133 | def stream_class_for_name(cls, stream_name): |
| 132 | """ helper for iter_streams """ | 134 | """ helper for iter_streams """ |
| 135 | + if stream_name == 'Workbook': | ||
| 136 | + return WorkbookStream | ||
| 133 | return XlsStream | 137 | return XlsStream |
| 134 | 138 | ||
| 135 | 139 |
tests/msodde/test_basic.py
| @@ -11,6 +11,7 @@ from __future__ import print_function | @@ -11,6 +11,7 @@ from __future__ import print_function | ||
| 11 | import unittest | 11 | import unittest |
| 12 | from oletools import msodde | 12 | from oletools import msodde |
| 13 | from tests.test_utils import DATA_BASE_DIR as BASE_DIR | 13 | from tests.test_utils import DATA_BASE_DIR as BASE_DIR |
| 14 | +import os | ||
| 14 | from os.path import join | 15 | from os.path import join |
| 15 | from traceback import print_exc | 16 | from traceback import print_exc |
| 16 | 17 | ||
| @@ -55,6 +56,20 @@ class TestReturnCode(unittest.TestCase): | @@ -55,6 +56,20 @@ class TestReturnCode(unittest.TestCase): | ||
| 55 | """ check that text file argument leads to non-zero exit status """ | 56 | """ check that text file argument leads to non-zero exit status """ |
| 56 | self.do_test_validity(join(BASE_DIR, 'basic/text'), True) | 57 | self.do_test_validity(join(BASE_DIR, 'basic/text'), True) |
| 57 | 58 | ||
| 59 | + def test_encrypted(self): | ||
| 60 | + """ | ||
| 61 | + check that encrypted files lead to non-zero exit status | ||
| 62 | + | ||
| 63 | + Currently, only the encryption applied by Office 2010 (CryptoApi RC4 | ||
| 64 | + Encryption) is tested. | ||
| 65 | + """ | ||
| 66 | + CRYPT_DIR = join(BASE_DIR, 'encrypted') | ||
| 67 | + ADD_ARGS = '', '-j', '-d', '-f', '-a' | ||
| 68 | + for filename in os.listdir(CRYPT_DIR): | ||
| 69 | + full_name = join(CRYPT_DIR, filename) | ||
| 70 | + for args in ADD_ARGS: | ||
| 71 | + self.do_test_validity(args + ' ' + full_name, True) | ||
| 72 | + | ||
| 58 | def do_test_validity(self, args, expect_error=False): | 73 | def do_test_validity(self, args, expect_error=False): |
| 59 | """ helper for test_valid_doc[x] """ | 74 | """ helper for test_valid_doc[x] """ |
| 60 | have_exception = False | 75 | have_exception = False |
tests/oleid/test_basic.py
0 → 100644
| 1 | +""" | ||
| 2 | +Test basic functionality of oleid | ||
| 3 | + | ||
| 4 | +Should work with python2 and python3! | ||
| 5 | +""" | ||
| 6 | + | ||
| 7 | +import unittest | ||
| 8 | +import os | ||
| 9 | +from os.path import join, relpath, splitext | ||
| 10 | +from oletools import oleid | ||
| 11 | + | ||
| 12 | +# Directory with test data, independent of current working directory | ||
| 13 | +from tests.test_utils import DATA_BASE_DIR | ||
| 14 | + | ||
| 15 | + | ||
| 16 | +class TestOleIDBasic(unittest.TestCase): | ||
| 17 | + """Test basic functionality of OleID""" | ||
| 18 | + | ||
| 19 | + def test_all(self): | ||
| 20 | + """Run all file in test-data through oleid and compare to known ouput""" | ||
| 21 | + # this relies on order of indicators being constant, could relax that | ||
| 22 | + # Also requires that files have the correct suffixes (no rtf in doc) | ||
| 23 | + NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '') | ||
| 24 | + NON_OLE_VALUES = (False, ) | ||
| 25 | + WORD = b'Microsoft Office Word' | ||
| 26 | + PPT = b'Microsoft Office PowerPoint' | ||
| 27 | + EXCEL = b'Microsoft Excel' | ||
| 28 | + CRYPT = (True, False, 'unknown', True, False, False, False, False, | ||
| 29 | + False, False, 0) | ||
| 30 | + OLE_VALUES = { | ||
| 31 | + 'oleobj/sample_with_lnk_file.doc': (True, True, WORD, False, True, | ||
| 32 | + False, False, False, False, | ||
| 33 | + True, 0), | ||
| 34 | + 'oleobj/embedded-simple-2007.xlsb': (False,), | ||
| 35 | + 'oleobj/embedded-simple-2007.docm': (False,), | ||
| 36 | + 'oleobj/embedded-simple-2007.xltx': (False,), | ||
| 37 | + 'oleobj/embedded-simple-2007.xlam': (False,), | ||
| 38 | + 'oleobj/embedded-simple-2007.dotm': (False,), | ||
| 39 | + 'oleobj/sample_with_lnk_file.ppt': (True, True, PPT, False, False, | ||
| 40 | + False, False, True, False, | ||
| 41 | + False, 0), | ||
| 42 | + 'oleobj/embedded-simple-2007.xlsx': (False,), | ||
| 43 | + 'oleobj/embedded-simple-2007.xlsm': (False,), | ||
| 44 | + 'oleobj/embedded-simple-2007.ppsx': (False,), | ||
| 45 | + 'oleobj/embedded-simple-2007.pps': (True, True, PPT, False, False, | ||
| 46 | + False, False, True, False, | ||
| 47 | + False, 0), | ||
| 48 | + 'oleobj/embedded-simple-2007.xla': (True, True, EXCEL, False, | ||
| 49 | + False, False, True, False, | ||
| 50 | + False, False, 0), | ||
| 51 | + 'oleobj/sample_with_calc_embedded.doc': (True, True, WORD, False, | ||
| 52 | + True, False, False, False, | ||
| 53 | + False, True, 0), | ||
| 54 | + 'oleobj/embedded-unicode-2007.docx': (False,), | ||
| 55 | + 'oleobj/embedded-unicode.doc': (True, True, WORD, False, True, | ||
| 56 | + False, False, False, False, True, | ||
| 57 | + 0), | ||
| 58 | + 'oleobj/embedded-simple-2007.doc': (True, True, WORD, False, True, | ||
| 59 | + False, False, False, False, | ||
| 60 | + True, 0), | ||
| 61 | + 'oleobj/embedded-simple-2007.xls': (True, True, EXCEL, False, | ||
| 62 | + False, False, True, False, | ||
| 63 | + False, False, 0), | ||
| 64 | + 'oleobj/embedded-simple-2007.dot': (True, True, WORD, False, True, | ||
| 65 | + False, False, False, False, | ||
| 66 | + True, 0), | ||
| 67 | + 'oleobj/sample_with_lnk_to_calc.doc': (True, True, WORD, False, | ||
| 68 | + True, False, False, False, | ||
| 69 | + False, True, 0), | ||
| 70 | + 'oleobj/embedded-simple-2007.ppt': (True, True, PPT, False, False, | ||
| 71 | + False, False, True, False, | ||
| 72 | + False, 0), | ||
| 73 | + 'oleobj/sample_with_lnk_file.pps': (True, True, PPT, False, False, | ||
| 74 | + False, False, True, False, | ||
| 75 | + False, 0), | ||
| 76 | + 'oleobj/embedded-simple-2007.pptx': (False,), | ||
| 77 | + 'oleobj/embedded-simple-2007.ppsm': (False,), | ||
| 78 | + 'oleobj/embedded-simple-2007.dotx': (False,), | ||
| 79 | + 'oleobj/embedded-simple-2007.pptm': (False,), | ||
| 80 | + 'oleobj/embedded-simple-2007.xlt': (True, True, EXCEL, False, | ||
| 81 | + False, False, True, False, | ||
| 82 | + False, False, 0), | ||
| 83 | + 'oleobj/embedded-simple-2007.docx': (False,), | ||
| 84 | + 'oleobj/embedded-simple-2007.potx': (False,), | ||
| 85 | + 'oleobj/embedded-simple-2007.pot': (True, True, PPT, False, False, | ||
| 86 | + False, False, True, False, | ||
| 87 | + False, 0), | ||
| 88 | + 'oleobj/embedded-simple-2007.xltm': (False,), | ||
| 89 | + 'oleobj/embedded-simple-2007.potm': (False,), | ||
| 90 | + 'encrypted/encrypted.xlsx': CRYPT, | ||
| 91 | + 'encrypted/encrypted.docm': CRYPT, | ||
| 92 | + 'encrypted/encrypted.docx': CRYPT, | ||
| 93 | + 'encrypted/encrypted.pptm': CRYPT, | ||
| 94 | + 'encrypted/encrypted.xlsb': CRYPT, | ||
| 95 | + 'encrypted/encrypted.xls': (True, True, EXCEL, True, False, False, | ||
| 96 | + True, False, False, False, 0), | ||
| 97 | + 'encrypted/encrypted.ppt': (True, False, 'unknown', True, False, | ||
| 98 | + False, False, True, False, False, 0), | ||
| 99 | + 'encrypted/encrypted.pptx': CRYPT, | ||
| 100 | + 'encrypted/encrypted.xlsm': CRYPT, | ||
| 101 | + 'encrypted/encrypted.doc': (True, True, WORD, True, True, False, | ||
| 102 | + False, False, False, False, 0), | ||
| 103 | + 'msodde/harmless-clean.docm': (False,), | ||
| 104 | + 'msodde/dde-in-csv.csv': (False,), | ||
| 105 | + 'msodde/dde-test-from-office2013-utf_16le-korean.doc': | ||
| 106 | + (True, True, WORD, False, True, False, False, False, False, | ||
| 107 | + False, 0), | ||
| 108 | + 'msodde/harmless-clean.doc': (True, True, WORD, False, True, False, | ||
| 109 | + False, False, False, False, 0), | ||
| 110 | + 'msodde/dde-test.docm': (False,), | ||
| 111 | + 'msodde/dde-test.xlsb': (False,), | ||
| 112 | + 'msodde/dde-test.xlsm': (False,), | ||
| 113 | + 'msodde/dde-test.docx': (False,), | ||
| 114 | + 'msodde/dde-test.xlsx': (False,), | ||
| 115 | + 'msodde/dde-test-from-office2003.doc': (True, True, WORD, False, | ||
| 116 | + True, False, False, False, | ||
| 117 | + False, False, 0), | ||
| 118 | + 'msodde/dde-test-from-office2016.doc': (True, True, WORD, False, | ||
| 119 | + True, False, False, False, | ||
| 120 | + False, False, 0), | ||
| 121 | + 'msodde/harmless-clean.docx': (False,), | ||
| 122 | + 'oleform/oleform-PR314.docm': (False,), | ||
| 123 | + 'basic/encrypted.docx': CRYPT, | ||
| 124 | + } | ||
| 125 | + | ||
| 126 | + indicator_names = [] | ||
| 127 | + for base_dir, _, files in os.walk(DATA_BASE_DIR): | ||
| 128 | + for filename in files: | ||
| 129 | + full_path = join(base_dir, filename) | ||
| 130 | + name = relpath(full_path, DATA_BASE_DIR) | ||
| 131 | + values = tuple(indicator.value for indicator in | ||
| 132 | + oleid.OleID(full_path).check()) | ||
| 133 | + if len(indicator_names) < 2: # not initialized with ole yet | ||
| 134 | + indicator_names = tuple(indicator.name for indicator in | ||
| 135 | + oleid.OleID(full_path).check()) | ||
| 136 | + suffix = splitext(filename)[1] | ||
| 137 | + if suffix in NON_OLE_SUFFIXES: | ||
| 138 | + self.assertEqual(values, NON_OLE_VALUES, | ||
| 139 | + msg='For non-ole file {} expected {}, ' | ||
| 140 | + 'not {}'.format(name, NON_OLE_VALUES, | ||
| 141 | + values)) | ||
| 142 | + continue | ||
| 143 | + try: | ||
| 144 | + self.assertEqual(values, OLE_VALUES[name], | ||
| 145 | + msg='Wrong detail values for {}:\n' | ||
| 146 | + ' Names {}\n Found {}\n Expect {}' | ||
| 147 | + .format(name, indicator_names, values, | ||
| 148 | + OLE_VALUES[name])) | ||
| 149 | + except KeyError: | ||
| 150 | + print('Should add oleid output for {} to {} ({})' | ||
| 151 | + .format(name, __name__, values[3:])) | ||
| 152 | + | ||
| 153 | +# just in case somebody calls this file as a script | ||
| 154 | +if __name__ == '__main__': | ||
| 155 | + unittest.main() |
tests/olevba/__init__.py
0 → 100644
tests/olevba/test_basic.py
0 → 100644
| 1 | +""" | ||
| 2 | +Test basic functionality of olevba[3] | ||
| 3 | +""" | ||
| 4 | + | ||
| 5 | +import unittest | ||
| 6 | +import sys | ||
| 7 | +if sys.version_info.major <= 2: | ||
| 8 | + from oletools import olevba | ||
| 9 | +else: | ||
| 10 | + from oletools import olevba3 as olevba | ||
| 11 | +import os | ||
| 12 | +from os.path import join | ||
| 13 | + | ||
| 14 | +# Directory with test data, independent of current working directory | ||
| 15 | +from tests.test_utils import DATA_BASE_DIR | ||
| 16 | + | ||
| 17 | + | ||
| 18 | +class TestOlevbaBasic(unittest.TestCase): | ||
| 19 | + """Tests olevba basic functionality""" | ||
| 20 | + | ||
| 21 | + def test_crypt_return(self): | ||
| 22 | + """ | ||
| 23 | + Tests that encrypted files give a certain return code. | ||
| 24 | + | ||
| 25 | + Currently, only the encryption applied by Office 2010 (CryptoApi RC4 | ||
| 26 | + Encryption) is tested. | ||
| 27 | + """ | ||
| 28 | + CRYPT_DIR = join(DATA_BASE_DIR, 'encrypted') | ||
| 29 | + CRYPT_RETURN_CODE = 9 | ||
| 30 | + ADD_ARGS = [], ['-d', ], ['-a', ], ['-j', ], ['-t', ] | ||
| 31 | + for filename in os.listdir(CRYPT_DIR): | ||
| 32 | + full_name = join(CRYPT_DIR, filename) | ||
| 33 | + for args in ADD_ARGS: | ||
| 34 | + try: | ||
| 35 | + ret_code = olevba.main(args + [full_name, ]) | ||
| 36 | + except SystemExit as se: | ||
| 37 | + ret_code = se.code or 0 # se.code can be None | ||
| 38 | + self.assertEqual(ret_code, CRYPT_RETURN_CODE, | ||
| 39 | + msg='Wrong return code {} for args {}' | ||
| 40 | + .format(ret_code, args + [filename, ])) | ||
| 41 | + | ||
| 42 | + | ||
| 43 | +# just in case somebody calls this file as a script | ||
| 44 | +if __name__ == '__main__': | ||
| 45 | + unittest.main() |
tests/test-data/encrypted/encrypted.doc
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.docm
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.docx
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.ppt
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.pptm
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.pptx
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xls
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xlsb
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xlsm
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xlsx
0 → 100644
No preview for this file type