Commit e8b6dd46b5b6221d8ff6725862f3b14fa15f2128
Committed by
GitHub
Merge pull request #362 from christian-intra2net/encrypt-detect-and-raise
Encrypt detect and raise
Showing
23 changed files
with
621 additions
and
119 deletions
oletools/common/errors.py
0 → 100644
| 1 | +""" | |
| 2 | +Errors used in several tools to avoid duplication | |
| 3 | + | |
| 4 | +.. codeauthor:: Intra2net AG <info@intra2net.com> | |
| 5 | +""" | |
| 6 | + | |
| 7 | +class FileIsEncryptedError(ValueError): | |
| 8 | + """Exception thrown if file is encrypted and cannot deal with it.""" | |
| 9 | + # see also: same class in olevba[3] and record_base | |
| 10 | + def __init__(self, filename=None): | |
| 11 | + super(FileIsEncryptedError, self).__init__( | |
| 12 | + 'Office file {}is encrypted, not yet supported' | |
| 13 | + .format('' if filename is None else filename + ' ')) | ... | ... |
oletools/msodde.py
| ... | ... | @@ -11,6 +11,7 @@ Supported formats: |
| 11 | 11 | - RTF |
| 12 | 12 | - CSV (exported from / imported into Excel) |
| 13 | 13 | - XML (exported from Word 2003, Word 2007+, Excel 2003, (Excel 2007+?) |
| 14 | +- raises an error if run with files encrypted using MS Crypto API RC4 | |
| 14 | 15 | |
| 15 | 16 | Author: Philippe Lagadec - http://www.decalage.info |
| 16 | 17 | License: BSD, see source code or documentation |
| ... | ... | @@ -61,7 +62,9 @@ import olefile |
| 61 | 62 | from oletools import ooxml |
| 62 | 63 | from oletools import xls_parser |
| 63 | 64 | from oletools import rtfobj |
| 65 | +from oletools import oleid | |
| 64 | 66 | from oletools.common.log_helper import log_helper |
| 67 | +from oletools.common.errors import FileIsEncryptedError | |
| 65 | 68 | |
| 66 | 69 | # ----------------------------------------------------------------------------- |
| 67 | 70 | # CHANGELOG: |
| ... | ... | @@ -84,6 +87,7 @@ from oletools.common.log_helper import log_helper |
| 84 | 87 | # 2018-01-10 CH: - add single-xml files (Word 2003/2007+ / Excel 2003) |
| 85 | 88 | # 2018-03-21 CH: - added detection for various CSV formulas (issue #259) |
| 86 | 89 | # 2018-09-11 v0.54 PL: - olefile is now a dependency |
| 90 | +# 2018-10-25 CH: - detect encryption and raise error if detected | |
| 87 | 91 | |
| 88 | 92 | __version__ = '0.54dev1' |
| 89 | 93 | |
| ... | ... | @@ -438,17 +442,18 @@ def process_doc_stream(stream): |
| 438 | 442 | return result_parts |
| 439 | 443 | |
| 440 | 444 | |
| 441 | -def process_doc(filepath): | |
| 445 | +def process_doc(ole): | |
| 442 | 446 | """ |
| 443 | 447 | find dde links in word ole (.doc/.dot) file |
| 444 | 448 | |
| 449 | + Checks whether files is ppt and returns empty immediately in that case | |
| 450 | + (ppt files cannot contain DDE-links to my knowledge) | |
| 451 | + | |
| 445 | 452 | like process_xml, returns a concatenated unicode string of dde links or |
| 446 | 453 | empty if none were found. dde-links will still begin with the dde[auto] key |
| 447 | 454 | word (possibly after some whitespace) |
| 448 | 455 | """ |
| 449 | 456 | logger.debug('process_doc') |
| 450 | - ole = olefile.OleFileIO(filepath, path_encoding=None) | |
| 451 | - | |
| 452 | 457 | links = [] |
| 453 | 458 | for sid, direntry in enumerate(ole.direntries): |
| 454 | 459 | is_orphan = direntry is None |
| ... | ... | @@ -703,8 +708,8 @@ def process_xlsx(filepath): |
| 703 | 708 | log_func = logger.debug |
| 704 | 709 | else: # default |
| 705 | 710 | log_func = logger.info |
| 706 | - log_func('Failed to parse {0} of content type {1}' | |
| 707 | - .format(subfile, content_type)) | |
| 711 | + log_func('Failed to parse {0} of content type {1} ("{2}")' | |
| 712 | + .format(subfile, content_type, str(exc))) | |
| 708 | 713 | # in any case: continue with next |
| 709 | 714 | |
| 710 | 715 | return u'\n'.join(dde_links) |
| ... | ... | @@ -886,9 +891,20 @@ def process_file(filepath, field_filter_mode=None): |
| 886 | 891 | if xls_parser.is_xls(filepath): |
| 887 | 892 | logger.debug('Process file as excel 2003 (xls)') |
| 888 | 893 | return process_xls(filepath) |
| 894 | + | |
| 895 | + # encrypted files also look like ole, even if office 2007+ (xml-based) | |
| 896 | + # so check for encryption, first | |
| 897 | + ole = olefile.OleFileIO(filepath, path_encoding=None) | |
| 898 | + oid = oleid.OleID(ole) | |
| 899 | + if oid.check_encrypted().value: | |
| 900 | + log.debug('is encrypted - raise error') | |
| 901 | + raise FileIsEncryptedError(filepath) | |
| 902 | + elif oid.check_powerpoint().value: | |
| 903 | + log.debug('is ppt - cannot have DDE') | |
| 904 | + return u'' | |
| 889 | 905 | else: |
| 890 | 906 | logger.debug('Process file as word 2003 (doc)') |
| 891 | - return process_doc(filepath) | |
| 907 | + return process_doc(ole) | |
| 892 | 908 | |
| 893 | 909 | with open(filepath, 'rb') as file_handle: |
| 894 | 910 | if file_handle.read(4) == RTF_START: | ... | ... |
oletools/oleid.py
| ... | ... | @@ -6,9 +6,8 @@ oleid is a script to analyze OLE files such as MS Office documents (e.g. Word, |
| 6 | 6 | Excel), to detect specific characteristics that could potentially indicate that |
| 7 | 7 | the file is suspicious or malicious, in terms of security (e.g. malware). |
| 8 | 8 | For example it can detect VBA macros, embedded Flash objects, fragmentation. |
| 9 | -The results can be displayed or returned as XML for further processing. | |
| 10 | - | |
| 11 | -Usage: oleid.py <file> | |
| 9 | +The results is displayed as ascii table (but could be returned or printed in | |
| 10 | +other formats like CSV, XML or JSON in future). | |
| 12 | 11 | |
| 13 | 12 | oleid project website: http://www.decalage.info/python/oleid |
| 14 | 13 | |
| ... | ... | @@ -21,8 +20,8 @@ http://www.decalage.info/python/oletools |
| 21 | 20 | # oleid is copyright (c) 2012-2018, Philippe Lagadec (http://www.decalage.info) |
| 22 | 21 | # All rights reserved. |
| 23 | 22 | # |
| 24 | -# Redistribution and use in source and binary forms, with or without modification, | |
| 25 | -# are permitted provided that the following conditions are met: | |
| 23 | +# Redistribution and use in source and binary forms, with or without | |
| 24 | +# modification, are permitted provided that the following conditions are met: | |
| 26 | 25 | # |
| 27 | 26 | # * Redistributions of source code must retain the above copyright notice, this |
| 28 | 27 | # list of conditions and the following disclaimer. |
| ... | ... | @@ -30,16 +29,17 @@ http://www.decalage.info/python/oletools |
| 30 | 29 | # this list of conditions and the following disclaimer in the documentation |
| 31 | 30 | # and/or other materials provided with the distribution. |
| 32 | 31 | # |
| 33 | -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 34 | -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 35 | -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 36 | -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 37 | -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 38 | -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 39 | -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 40 | -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 41 | -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 42 | -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 32 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
| 33 | +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 34 | +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 35 | +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | |
| 36 | +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
| 37 | +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
| 38 | +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
| 39 | +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
| 40 | +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 41 | +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
| 42 | +# POSSIBILITY OF SUCH DAMAGE. | |
| 43 | 43 | |
| 44 | 44 | # To improve Python 2+3 compatibility: |
| 45 | 45 | from __future__ import print_function |
| ... | ... | @@ -56,6 +56,8 @@ from __future__ import print_function |
| 56 | 56 | # 2017-04-26 PL: - fixed absolute imports (issue #141) |
| 57 | 57 | # 2017-09-01 SA: - detect OpenXML encryption |
| 58 | 58 | # 2018-09-11 v0.54 PL: - olefile is now a dependency |
| 59 | +# 2018-10-19 CH: - accept olefile as well as filename, return Indicators, | |
| 60 | +# improve encryption detection for ppt | |
| 59 | 61 | |
| 60 | 62 | __version__ = '0.54dev1' |
| 61 | 63 | |
| ... | ... | @@ -78,28 +80,27 @@ __version__ = '0.54dev1' |
| 78 | 80 | |
| 79 | 81 | #=== IMPORTS ================================================================= |
| 80 | 82 | |
| 81 | -import optparse, sys, os, re, zlib, struct | |
| 83 | +import argparse, sys, re, zlib, struct | |
| 84 | +from os.path import dirname, abspath | |
| 82 | 85 | |
| 83 | -# IMPORTANT: it should be possible to run oletools directly as scripts | |
| 84 | -# in any directory without installing them with pip or setup.py. | |
| 85 | -# In that case, relative imports are NOT usable. | |
| 86 | -# And to enable Python 2+3 compatibility, we need to use absolute imports, | |
| 87 | -# so we add the oletools parent folder to sys.path (absolute+normalized path): | |
| 88 | -_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) | |
| 89 | -# print('_thismodule_dir = %r' % _thismodule_dir) | |
| 90 | -_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..')) | |
| 91 | -# print('_parent_dir = %r' % _thirdparty_dir) | |
| 92 | -if not _parent_dir in sys.path: | |
| 93 | - sys.path.insert(0, _parent_dir) | |
| 86 | +# little hack to allow absolute imports even if oletools is not installed | |
| 87 | +# (required to run oletools directly as scripts in any directory). | |
| 88 | +try: | |
| 89 | + from oletools.thirdparty import prettytable | |
| 90 | +except ImportError: | |
| 91 | + PARENT_DIR = dirname(dirname(abspath(__file__))) | |
| 92 | + if PARENT_DIR not in sys.path: | |
| 93 | + sys.path.insert(0, PARENT_DIR) | |
| 94 | + del PARENT_DIR | |
| 95 | + from oletools.thirdparty import prettytable | |
| 94 | 96 | |
| 95 | 97 | import olefile |
| 96 | -from oletools.thirdparty.prettytable import prettytable | |
| 97 | 98 | |
| 98 | 99 | |
| 99 | 100 | |
| 100 | 101 | #=== FUNCTIONS =============================================================== |
| 101 | 102 | |
| 102 | -def detect_flash (data): | |
| 103 | +def detect_flash(data): | |
| 103 | 104 | """ |
| 104 | 105 | Detect Flash objects (SWF files) within a binary string of data |
| 105 | 106 | return a list of (start_index, length, compressed) tuples, or [] if nothing |
| ... | ... | @@ -141,7 +142,7 @@ def detect_flash (data): |
| 141 | 142 | compressed_data = swf[8:] |
| 142 | 143 | try: |
| 143 | 144 | zlib.decompress(compressed_data) |
| 144 | - except: | |
| 145 | + except Exception: | |
| 145 | 146 | continue |
| 146 | 147 | # else we don't check anything at this stage, we only assume it is a |
| 147 | 148 | # valid SWF. So there might be false positives for uncompressed SWF. |
| ... | ... | @@ -152,9 +153,15 @@ def detect_flash (data): |
| 152 | 153 | |
| 153 | 154 | #=== CLASSES ================================================================= |
| 154 | 155 | |
| 155 | -class Indicator (object): | |
| 156 | +class Indicator(object): | |
| 157 | + """ | |
| 158 | + Piece of information of an :py:class:`OleID` object. | |
| 159 | + | |
| 160 | + Contains an ID, value, type, name and description. No other functionality. | |
| 161 | + """ | |
| 156 | 162 | |
| 157 | - def __init__(self, _id, value=None, _type=bool, name=None, description=None): | |
| 163 | + def __init__(self, _id, value=None, _type=bool, name=None, | |
| 164 | + description=None): | |
| 158 | 165 | self.id = _id |
| 159 | 166 | self.value = value |
| 160 | 167 | self.type = _type |
| ... | ... | @@ -164,21 +171,55 @@ class Indicator (object): |
| 164 | 171 | self.description = description |
| 165 | 172 | |
| 166 | 173 | |
| 167 | -class OleID: | |
| 174 | +class OleID(object): | |
| 175 | + """ | |
| 176 | + Summary of information about an OLE file | |
| 168 | 177 | |
| 169 | - def __init__(self, filename): | |
| 170 | - self.filename = filename | |
| 178 | + Call :py:meth:`OleID.check` to gather all info on a given file or run one | |
| 179 | + of the `check_` functions to just get a specific piece of info. | |
| 180 | + """ | |
| 181 | + | |
| 182 | + def __init__(self, input_file): | |
| 183 | + """ | |
| 184 | + Create an OleID object | |
| 185 | + | |
| 186 | + This does not run any checks yet nor open the file. | |
| 187 | + | |
| 188 | + Can either give just a filename (as str), so OleID will check whether | |
| 189 | + that is a valid OLE file and create a :py:class:`olefile.OleFileIO` | |
| 190 | + object for it. Or you can give an already opened | |
| 191 | + :py:class:`olefile.OleFileIO` as argument to avoid re-opening (e.g. if | |
| 192 | + called from other oletools). | |
| 193 | + | |
| 194 | + If filename is given, only :py:meth:`OleID.check` opens the file. Other | |
| 195 | + functions will return None | |
| 196 | + """ | |
| 197 | + if isinstance(input_file, olefile.OleFileIO): | |
| 198 | + self.ole = input_file | |
| 199 | + self.filename = None | |
| 200 | + else: | |
| 201 | + self.filename = input_file | |
| 202 | + self.ole = None | |
| 171 | 203 | self.indicators = [] |
| 204 | + self.suminfo_data = None | |
| 172 | 205 | |
| 173 | 206 | def check(self): |
| 207 | + """ | |
| 208 | + Open file and run all checks on it. | |
| 209 | + | |
| 210 | + :returns: list of all :py:class:`Indicator`s created | |
| 211 | + """ | |
| 174 | 212 | # check if it is actually an OLE file: |
| 175 | 213 | oleformat = Indicator('ole_format', True, name='OLE format') |
| 176 | 214 | self.indicators.append(oleformat) |
| 177 | - if not olefile.isOleFile(self.filename): | |
| 215 | + if self.ole: | |
| 216 | + oleformat.value = True | |
| 217 | + elif not olefile.isOleFile(self.filename): | |
| 178 | 218 | oleformat.value = False |
| 179 | 219 | return self.indicators |
| 180 | - # parse file: | |
| 181 | - self.ole = olefile.OleFileIO(self.filename) | |
| 220 | + else: | |
| 221 | + # parse file: | |
| 222 | + self.ole = olefile.OleFileIO(self.filename) | |
| 182 | 223 | # checks: |
| 183 | 224 | self.check_properties() |
| 184 | 225 | self.check_encrypted() |
| ... | ... | @@ -186,143 +227,274 @@ class OleID: |
| 186 | 227 | self.check_excel() |
| 187 | 228 | self.check_powerpoint() |
| 188 | 229 | self.check_visio() |
| 189 | - self.check_ObjectPool() | |
| 230 | + self.check_object_pool() | |
| 190 | 231 | self.check_flash() |
| 191 | 232 | self.ole.close() |
| 192 | 233 | return self.indicators |
| 193 | 234 | |
| 194 | - def check_properties (self): | |
| 195 | - suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream') | |
| 235 | + def check_properties(self): | |
| 236 | + """ | |
| 237 | + Read summary information required for other check_* functions | |
| 238 | + | |
| 239 | + :returns: 2 :py:class:`Indicator`s (for presence of summary info and | |
| 240 | + application name) or None if file was not opened | |
| 241 | + """ | |
| 242 | + suminfo = Indicator('has_suminfo', False, | |
| 243 | + name='Has SummaryInformation stream') | |
| 196 | 244 | self.indicators.append(suminfo) |
| 197 | - appname = Indicator('appname', 'unknown', _type=str, name='Application name') | |
| 245 | + appname = Indicator('appname', 'unknown', _type=str, | |
| 246 | + name='Application name') | |
| 198 | 247 | self.indicators.append(appname) |
| 199 | - self.suminfo = {} | |
| 200 | - # check stream SummaryInformation | |
| 248 | + if not self.ole: | |
| 249 | + return None, None | |
| 250 | + self.suminfo_data = {} | |
| 251 | + # check stream SummaryInformation (not present e.g. in encrypted ppt) | |
| 201 | 252 | if self.ole.exists("\x05SummaryInformation"): |
| 202 | 253 | suminfo.value = True |
| 203 | - self.suminfo = self.ole.getproperties("\x05SummaryInformation") | |
| 254 | + self.suminfo_data = self.ole.getproperties("\x05SummaryInformation") | |
| 204 | 255 | # check application name: |
| 205 | - appname.value = self.suminfo.get(0x12, 'unknown') | |
| 206 | - | |
| 207 | - def check_encrypted (self): | |
| 256 | + appname.value = self.suminfo_data.get(0x12, 'unknown') | |
| 257 | + return suminfo, appname | |
| 258 | + | |
| 259 | + def get_indicator(self, indicator_id): | |
| 260 | + """Helper function: returns an indicator if present (or None)""" | |
| 261 | + result = [indicator for indicator in self.indicators | |
| 262 | + if indicator.id == indicator_id] | |
| 263 | + if result: | |
| 264 | + return result[0] | |
| 265 | + else: | |
| 266 | + return None | |
| 267 | + | |
| 268 | + def check_encrypted(self): | |
| 269 | + """ | |
| 270 | + Check whether this file is encrypted. | |
| 271 | + | |
| 272 | + Might call check_properties. | |
| 273 | + | |
| 274 | + :returns: :py:class:`Indicator` for encryption or None if file was not | |
| 275 | + opened | |
| 276 | + """ | |
| 208 | 277 | # we keep the pointer to the indicator, can be modified by other checks: |
| 209 | - self.encrypted = Indicator('encrypted', False, name='Encrypted') | |
| 210 | - self.indicators.append(self.encrypted) | |
| 278 | + encrypted = Indicator('encrypted', False, name='Encrypted') | |
| 279 | + self.indicators.append(encrypted) | |
| 280 | + if not self.ole: | |
| 281 | + return None | |
| 211 | 282 | # check if bit 1 of security field = 1: |
| 212 | 283 | # (this field may be missing for Powerpoint2000, for example) |
| 213 | - if 0x13 in self.suminfo: | |
| 214 | - if self.suminfo[0x13] & 1: | |
| 215 | - self.encrypted.value = True | |
| 284 | + if self.suminfo_data is None: | |
| 285 | + self.check_properties() | |
| 286 | + if 0x13 in self.suminfo_data: | |
| 287 | + if self.suminfo_data[0x13] & 1: | |
| 288 | + encrypted.value = True | |
| 216 | 289 | # check if this is an OpenXML encrypted file |
| 217 | 290 | elif self.ole.exists('EncryptionInfo'): |
| 218 | - self.encrypted.value = True | |
| 219 | - | |
| 220 | - def check_word (self): | |
| 221 | - word = Indicator('word', False, name='Word Document', | |
| 222 | - description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.') | |
| 291 | + encrypted.value = True | |
| 292 | + # or an encrypted ppt file | |
| 293 | + if self.ole.exists('EncryptedSummary') and \ | |
| 294 | + not self.ole.exists('SummaryInformation'): | |
| 295 | + encrypted.value = True | |
| 296 | + return encrypted | |
| 297 | + | |
| 298 | + def check_word(self): | |
| 299 | + """ | |
| 300 | + Check whether this file is a word document | |
| 301 | + | |
| 302 | + If this finds evidence of encryption, will correct/add encryption | |
| 303 | + indicator. | |
| 304 | + | |
| 305 | + :returns: 2 :py:class:`Indicator`s (for word and vba_macro) or None if | |
| 306 | + file was not opened | |
| 307 | + """ | |
| 308 | + word = Indicator( | |
| 309 | + 'word', False, name='Word Document', | |
| 310 | + description='Contains a WordDocument stream, very likely to be a ' | |
| 311 | + 'Microsoft Word Document.') | |
| 223 | 312 | self.indicators.append(word) |
| 224 | - self.macros = Indicator('vba_macros', False, name='VBA Macros') | |
| 225 | - self.indicators.append(self.macros) | |
| 313 | + macros = Indicator('vba_macros', False, name='VBA Macros') | |
| 314 | + self.indicators.append(macros) | |
| 315 | + if not self.ole: | |
| 316 | + return None, None | |
| 226 | 317 | if self.ole.exists('WordDocument'): |
| 227 | 318 | word.value = True |
| 228 | 319 | # check for Word-specific encryption flag: |
| 229 | - s = self.ole.openstream(["WordDocument"]) | |
| 230 | - # pass header 10 bytes | |
| 231 | - s.read(10) | |
| 232 | - # read flag structure: | |
| 233 | - temp16 = struct.unpack("H", s.read(2))[0] | |
| 234 | - fEncrypted = (temp16 & 0x0100) >> 8 | |
| 235 | - if fEncrypted: | |
| 236 | - self.encrypted.value = True | |
| 237 | - s.close() | |
| 320 | + stream = None | |
| 321 | + try: | |
| 322 | + stream = self.ole.openstream(["WordDocument"]) | |
| 323 | + # pass header 10 bytes | |
| 324 | + stream.read(10) | |
| 325 | + # read flag structure: | |
| 326 | + temp16 = struct.unpack("H", stream.read(2))[0] | |
| 327 | + f_encrypted = (temp16 & 0x0100) >> 8 | |
| 328 | + if f_encrypted: | |
| 329 | + # correct encrypted indicator if present or add one | |
| 330 | + encrypt_ind = self.get_indicator('encrypted') | |
| 331 | + if encrypt_ind: | |
| 332 | + encrypt_ind.value = True | |
| 333 | + else: | |
| 334 | + self.indicators.append('encrypted', True, name='Encrypted') | |
| 335 | + except Exception: | |
| 336 | + raise | |
| 337 | + finally: | |
| 338 | + if stream is not None: | |
| 339 | + stream.close() | |
| 238 | 340 | # check for VBA macros: |
| 239 | 341 | if self.ole.exists('Macros'): |
| 240 | - self.macros.value = True | |
| 342 | + macros.value = True | |
| 343 | + return word, macros | |
| 344 | + | |
| 345 | + def check_excel(self): | |
| 346 | + """ | |
| 347 | + Check whether this file is an excel workbook. | |
| 348 | + | |
| 349 | + If this finds macros, will add/correct macro indicator. | |
| 241 | 350 | |
| 242 | - def check_excel (self): | |
| 243 | - excel = Indicator('excel', False, name='Excel Workbook', | |
| 244 | - description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.') | |
| 351 | + see also: :py:func:`xls_parser.is_xls` | |
| 352 | + | |
| 353 | + :returns: :py:class:`Indicator` for excel or (None, None) if file was | |
| 354 | + not opened | |
| 355 | + """ | |
| 356 | + excel = Indicator( | |
| 357 | + 'excel', False, name='Excel Workbook', | |
| 358 | + description='Contains a Workbook or Book stream, very likely to be ' | |
| 359 | + 'a Microsoft Excel Workbook.') | |
| 245 | 360 | self.indicators.append(excel) |
| 361 | + if not self.ole: | |
| 362 | + return None | |
| 246 | 363 | #self.macros = Indicator('vba_macros', False, name='VBA Macros') |
| 247 | 364 | #self.indicators.append(self.macros) |
| 248 | 365 | if self.ole.exists('Workbook') or self.ole.exists('Book'): |
| 249 | 366 | excel.value = True |
| 250 | 367 | # check for VBA macros: |
| 251 | 368 | if self.ole.exists('_VBA_PROJECT_CUR'): |
| 252 | - self.macros.value = True | |
| 253 | - | |
| 254 | - def check_powerpoint (self): | |
| 255 | - ppt = Indicator('ppt', False, name='PowerPoint Presentation', | |
| 256 | - description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.') | |
| 369 | + # correct macro indicator if present or add one | |
| 370 | + macro_ind = self.get_indicator('vba_macros') | |
| 371 | + if macro_ind: | |
| 372 | + macro_ind.value = True | |
| 373 | + else: | |
| 374 | + self.indicators.append('vba_macros', True, | |
| 375 | + name='VBA Macros') | |
| 376 | + return excel | |
| 377 | + | |
| 378 | + def check_powerpoint(self): | |
| 379 | + """ | |
| 380 | + Check whether this file is a powerpoint presentation | |
| 381 | + | |
| 382 | + see also: :py:func:`ppt_record_parser.is_ppt` | |
| 383 | + | |
| 384 | + :returns: :py:class:`Indicator` for whether this is a powerpoint | |
| 385 | + presentation or not or None if file was not opened | |
| 386 | + """ | |
| 387 | + ppt = Indicator( | |
| 388 | + 'ppt', False, name='PowerPoint Presentation', | |
| 389 | + description='Contains a PowerPoint Document stream, very likely to ' | |
| 390 | + 'be a Microsoft PowerPoint Presentation.') | |
| 257 | 391 | self.indicators.append(ppt) |
| 392 | + if not self.ole: | |
| 393 | + return None | |
| 258 | 394 | if self.ole.exists('PowerPoint Document'): |
| 259 | 395 | ppt.value = True |
| 260 | - | |
| 261 | - def check_visio (self): | |
| 262 | - visio = Indicator('visio', False, name='Visio Drawing', | |
| 263 | - description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.') | |
| 396 | + return ppt | |
| 397 | + | |
| 398 | + def check_visio(self): | |
| 399 | + """Check whether this file is a visio drawing""" | |
| 400 | + visio = Indicator( | |
| 401 | + 'visio', False, name='Visio Drawing', | |
| 402 | + description='Contains a VisioDocument stream, very likely to be a ' | |
| 403 | + 'Microsoft Visio Drawing.') | |
| 264 | 404 | self.indicators.append(visio) |
| 405 | + if not self.ole: | |
| 406 | + return None | |
| 265 | 407 | if self.ole.exists('VisioDocument'): |
| 266 | 408 | visio.value = True |
| 409 | + return visio | |
| 410 | + | |
| 411 | + def check_object_pool(self): | |
| 412 | + """ | |
| 413 | + Check whether this file contains an ObjectPool stream. | |
| 414 | + | |
| 415 | + Such a stream would be a strong indicator for embedded objects or files. | |
| 267 | 416 | |
| 268 | - def check_ObjectPool (self): | |
| 269 | - objpool = Indicator('ObjectPool', False, name='ObjectPool', | |
| 270 | - description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.') | |
| 417 | + :returns: :py:class:`Indicator` for ObjectPool stream or None if file | |
| 418 | + was not opened | |
| 419 | + """ | |
| 420 | + objpool = Indicator( | |
| 421 | + 'ObjectPool', False, name='ObjectPool', | |
| 422 | + description='Contains an ObjectPool stream, very likely to contain ' | |
| 423 | + 'embedded OLE objects or files.') | |
| 271 | 424 | self.indicators.append(objpool) |
| 425 | + if not self.ole: | |
| 426 | + return None | |
| 272 | 427 | if self.ole.exists('ObjectPool'): |
| 273 | 428 | objpool.value = True |
| 274 | - | |
| 275 | - | |
| 276 | - def check_flash (self): | |
| 277 | - flash = Indicator('flash', 0, _type=int, name='Flash objects', | |
| 278 | - description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.') | |
| 429 | + return objpool | |
| 430 | + | |
| 431 | + def check_flash(self): | |
| 432 | + """ | |
| 433 | + Check whether this file contains flash objects | |
| 434 | + | |
| 435 | + :returns: :py:class:`Indicator` for count of flash objects or None if | |
| 436 | + file was not opened | |
| 437 | + """ | |
| 438 | + flash = Indicator( | |
| 439 | + 'flash', 0, _type=int, name='Flash objects', | |
| 440 | + description='Number of embedded Flash objects (SWF files) detected ' | |
| 441 | + 'in OLE streams. Not 100% accurate, there may be false ' | |
| 442 | + 'positives.') | |
| 279 | 443 | self.indicators.append(flash) |
| 444 | + if not self.ole: | |
| 445 | + return None | |
| 280 | 446 | for stream in self.ole.listdir(): |
| 281 | 447 | data = self.ole.openstream(stream).read() |
| 282 | 448 | found = detect_flash(data) |
| 283 | 449 | # just add to the count of Flash objects: |
| 284 | 450 | flash.value += len(found) |
| 285 | 451 | #print stream, found |
| 452 | + return flash | |
| 286 | 453 | |
| 287 | 454 | |
| 288 | 455 | #=== MAIN ================================================================= |
| 289 | 456 | |
| 290 | 457 | def main(): |
| 458 | + """Called when running this file as script. Shows all info on input file.""" | |
| 291 | 459 | # print banner with version |
| 292 | - print ('oleid %s - http://decalage.info/oletools' % __version__) | |
| 293 | - print ('THIS IS WORK IN PROGRESS - Check updates regularly!') | |
| 294 | - print ('Please report any issue at https://github.com/decalage2/oletools/issues') | |
| 295 | - print ('') | |
| 460 | + print('oleid %s - http://decalage.info/oletools' % __version__) | |
| 461 | + print('THIS IS WORK IN PROGRESS - Check updates regularly!') | |
| 462 | + print('Please report any issue at ' | |
| 463 | + 'https://github.com/decalage2/oletools/issues') | |
| 464 | + print('') | |
| 296 | 465 | |
| 297 | - usage = 'usage: %prog [options] <file>' | |
| 298 | - parser = optparse.OptionParser(usage=__doc__ + '\n' + usage) | |
| 299 | -## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream') | |
| 466 | + parser = argparse.ArgumentParser(description=__doc__) | |
| 467 | + parser.add_argument('input', type=str, nargs='*', metavar='FILE', | |
| 468 | + help='Name of files to process') | |
| 469 | + # parser.add_argument('-o', '--ole', action='store_true', dest='ole', | |
| 470 | + # help='Parse an OLE file (e.g. Word, Excel) to look for ' | |
| 471 | + # 'SWF in each stream') | |
| 300 | 472 | |
| 301 | - (options, args) = parser.parse_args() | |
| 473 | + args = parser.parse_args() | |
| 302 | 474 | |
| 303 | 475 | # Print help if no argurments are passed |
| 304 | - if len(args) == 0: | |
| 476 | + if len(args.input) == 0: | |
| 305 | 477 | parser.print_help() |
| 306 | 478 | return |
| 307 | 479 | |
| 308 | - for filename in args: | |
| 480 | + for filename in args.input: | |
| 309 | 481 | print('Filename:', filename) |
| 310 | 482 | oleid = OleID(filename) |
| 311 | 483 | indicators = oleid.check() |
| 312 | 484 | |
| 313 | 485 | #TODO: add description |
| 314 | 486 | #TODO: highlight suspicious indicators |
| 315 | - t = prettytable.PrettyTable(['Indicator', 'Value']) | |
| 316 | - t.align = 'l' | |
| 317 | - t.max_width = 39 | |
| 318 | - #t.border = False | |
| 487 | + table = prettytable.PrettyTable(['Indicator', 'Value']) | |
| 488 | + table.align = 'l' | |
| 489 | + table.max_width = 39 | |
| 490 | + table.border = False | |
| 319 | 491 | |
| 320 | 492 | for indicator in indicators: |
| 321 | 493 | #print '%s: %s' % (indicator.name, indicator.value) |
| 322 | - t.add_row((indicator.name, indicator.value)) | |
| 494 | + table.add_row((indicator.name, indicator.value)) | |
| 323 | 495 | |
| 324 | - print(t) | |
| 325 | - print ('') | |
| 496 | + print(table) | |
| 497 | + print('') | |
| 326 | 498 | |
| 327 | 499 | if __name__ == '__main__': |
| 328 | 500 | main() | ... | ... |
oletools/olevba.py
| ... | ... | @@ -14,6 +14,7 @@ Supported formats: |
| 14 | 14 | - Word 2003 XML (.xml) |
| 15 | 15 | - Word/Excel Single File Web Page / MHTML (.mht) |
| 16 | 16 | - Publisher (.pub) |
| 17 | +- raises an error if run with files encrypted using MS Crypto API RC4 | |
| 17 | 18 | |
| 18 | 19 | Author: Philippe Lagadec - http://www.decalage.info |
| 19 | 20 | License: BSD, see source code or documentation |
| ... | ... | @@ -208,6 +209,7 @@ from __future__ import print_function |
| 208 | 209 | # (issue #283) |
| 209 | 210 | # 2018-09-11 v0.54 PL: - olefile is now a dependency |
| 210 | 211 | # 2018-10-08 PL: - replace backspace before printing to console (issue #358) |
| 212 | +# 2018-10-25 CH: - detect encryption and raise error if detected | |
| 211 | 213 | |
| 212 | 214 | __version__ = '0.54dev2' |
| 213 | 215 | |
| ... | ... | @@ -309,6 +311,8 @@ from pyparsing import \ |
| 309 | 311 | from oletools import ppt_parser |
| 310 | 312 | from oletools import oleform |
| 311 | 313 | from oletools import rtfobj |
| 314 | +from oletools import oleid | |
| 315 | +from oletools.common.errors import FileIsEncryptedError | |
| 312 | 316 | |
| 313 | 317 | |
| 314 | 318 | # monkeypatch email to fix issue #32: |
| ... | ... | @@ -472,6 +476,7 @@ RETURN_OPEN_ERROR = 5 |
| 472 | 476 | RETURN_PARSE_ERROR = 6 |
| 473 | 477 | RETURN_SEVERAL_ERRS = 7 |
| 474 | 478 | RETURN_UNEXPECTED = 8 |
| 479 | +RETURN_ENCRYPTED = 9 | |
| 475 | 480 | |
| 476 | 481 | # MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python) |
| 477 | 482 | MAC_CODEPAGES = { |
| ... | ... | @@ -2367,6 +2372,12 @@ class VBA_Parser(object): |
| 2367 | 2372 | # This looks like an OLE file |
| 2368 | 2373 | self.open_ole(_file) |
| 2369 | 2374 | |
| 2375 | + # check whether file is encrypted (need to do this before try ppt) | |
| 2376 | + log.debug('Check encryption of ole file') | |
| 2377 | + crypt_indicator = oleid.OleID(self.ole_file).check_encrypted() | |
| 2378 | + if crypt_indicator.value: | |
| 2379 | + raise FileIsEncryptedError(filename) | |
| 2380 | + | |
| 2370 | 2381 | # if this worked, try whether it is a ppt file (special ole file) |
| 2371 | 2382 | self.open_ppt() |
| 2372 | 2383 | if self.type is None and is_zipfile(_file): |
| ... | ... | @@ -3634,6 +3645,16 @@ def main(cmd_line_args=None): |
| 3634 | 3645 | % (filename, exc.orig_exc)) |
| 3635 | 3646 | return_code = RETURN_PARSE_ERROR if return_code == 0 \ |
| 3636 | 3647 | else RETURN_SEVERAL_ERRS |
| 3648 | + except FileIsEncryptedError as exc: | |
| 3649 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3650 | + print('%-12s %s - File is encrypted' % ('!ERROR', filename)) | |
| 3651 | + elif options.output_mode == 'json': | |
| 3652 | + print_json(file=filename, type='error', | |
| 3653 | + error=type(exc).__name__, message=str(exc)) | |
| 3654 | + else: | |
| 3655 | + log.exception('File %s is encrypted!' % (filename)) | |
| 3656 | + return_code = RETURN_ENCRYPTED if return_code == 0 \ | |
| 3657 | + else RETURN_SEVERAL_ERRS | |
| 3637 | 3658 | # Here we do not close the vba_parser, because process_file may need it below. |
| 3638 | 3659 | |
| 3639 | 3660 | if options.output_mode == 'triage': | ... | ... |
oletools/olevba3.py
| ... | ... | @@ -16,6 +16,7 @@ Supported formats: |
| 16 | 16 | - Word 2003 XML (.xml) |
| 17 | 17 | - Word/Excel Single File Web Page / MHTML (.mht) |
| 18 | 18 | - Publisher (.pub) |
| 19 | +- raises an error if run with files encrypted using MS Crypto API RC4 | |
| 19 | 20 | |
| 20 | 21 | Author: Philippe Lagadec - http://www.decalage.info |
| 21 | 22 | License: BSD, see source code or documentation |
| ... | ... | @@ -207,6 +208,7 @@ from __future__ import print_function |
| 207 | 208 | # 2018-06-11 v0.53.1 MHW: - fixed #320: chr instead of unichr on python 3 |
| 208 | 209 | # 2018-06-12 MHW: - fixed #322: import reduce from functools |
| 209 | 210 | # 2018-09-11 v0.54 PL: - olefile is now a dependency |
| 211 | +# 2018-10-25 CH: - detect encryption and raise error if detected | |
| 210 | 212 | |
| 211 | 213 | __version__ = '0.54dev1' |
| 212 | 214 | |
| ... | ... | @@ -247,7 +249,6 @@ import os |
| 247 | 249 | import logging |
| 248 | 250 | import struct |
| 249 | 251 | from _io import StringIO,BytesIO |
| 250 | -from oletools import rtfobj | |
| 251 | 252 | import math |
| 252 | 253 | import zipfile |
| 253 | 254 | import re |
| ... | ... | @@ -298,6 +299,9 @@ from pyparsing import \ |
| 298 | 299 | alphanums, alphas, hexnums,nums, opAssoc, srange, \ |
| 299 | 300 | infixNotation, ParserElement |
| 300 | 301 | import oletools.ppt_parser as ppt_parser |
| 302 | +from oletools import rtfobj | |
| 303 | +from oletools import oleid | |
| 304 | +from oletools.common.errors import FileIsEncryptedError | |
| 301 | 305 | |
| 302 | 306 | # monkeypatch email to fix issue #32: |
| 303 | 307 | # allow header lines without ":" |
| ... | ... | @@ -479,6 +483,7 @@ RETURN_OPEN_ERROR = 5 |
| 479 | 483 | RETURN_PARSE_ERROR = 6 |
| 480 | 484 | RETURN_SEVERAL_ERRS = 7 |
| 481 | 485 | RETURN_UNEXPECTED = 8 |
| 486 | +RETURN_ENCRYPTED = 9 | |
| 482 | 487 | |
| 483 | 488 | # MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python) |
| 484 | 489 | MAC_CODEPAGES = { |
| ... | ... | @@ -2360,6 +2365,12 @@ class VBA_Parser(object): |
| 2360 | 2365 | # This looks like an OLE file |
| 2361 | 2366 | self.open_ole(_file) |
| 2362 | 2367 | |
| 2368 | + # check whether file is encrypted (need to do this before try ppt) | |
| 2369 | + log.debug('Check encryption of ole file') | |
| 2370 | + crypt_indicator = oleid.OleID(self.ole_file).check_encrypted() | |
| 2371 | + if crypt_indicator.value: | |
| 2372 | + raise FileIsEncryptedError(filename) | |
| 2373 | + | |
| 2363 | 2374 | # if this worked, try whether it is a ppt file (special ole file) |
| 2364 | 2375 | self.open_ppt() |
| 2365 | 2376 | if self.type is None and is_zipfile(_file): |
| ... | ... | @@ -3594,6 +3605,18 @@ def main(cmd_line_args=None): |
| 3594 | 3605 | % (filename, exc.orig_exc)) |
| 3595 | 3606 | return_code = RETURN_PARSE_ERROR if return_code == 0 \ |
| 3596 | 3607 | else RETURN_SEVERAL_ERRS |
| 3608 | + except FileIsEncryptedError as exc: | |
| 3609 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3610 | + print('%-12s %s - File is encrypted' % ('!ERROR', filename)) | |
| 3611 | + elif options.output_mode == 'json': | |
| 3612 | + print_json(file=filename, type='error', | |
| 3613 | + error=type(exc).__name__, message=str(exc)) | |
| 3614 | + else: | |
| 3615 | + log.exception('File %s is encrypted!' % (filename)) | |
| 3616 | + return_code = RETURN_ENCRYPTED if return_code == 0 \ | |
| 3617 | + else RETURN_SEVERAL_ERRS | |
| 3618 | + # Here we do not close the vba_parser, because process_file may need it below. | |
| 3619 | + | |
| 3597 | 3620 | finally: |
| 3598 | 3621 | if vba_parser is not None: |
| 3599 | 3622 | vba_parser.close() | ... | ... |
oletools/ooxml.py
| ... | ... | @@ -9,6 +9,8 @@ See also: Notes on Microsoft's implementation of ECMA-376: [MS-0E376] |
| 9 | 9 | |
| 10 | 10 | TODO: may have to tell apart single xml types: office2003 looks much different |
| 11 | 11 | than 2006+ --> DOCTYPE_*_XML2003 |
| 12 | +TODO: check what is duplicate here with oleid, maybe merge some day? | |
| 13 | +TODO: "xml2003" == "flatopc"? | |
| 12 | 14 | |
| 13 | 15 | .. codeauthor:: Intra2net AG <info@intra2net> |
| 14 | 16 | """ | ... | ... |
oletools/ppt_record_parser.py
| ... | ... | @@ -63,6 +63,7 @@ except ImportError: |
| 63 | 63 | sys.path.insert(0, PARENT_DIR) |
| 64 | 64 | del PARENT_DIR |
| 65 | 65 | from oletools import record_base |
| 66 | +from oletools.common.errors import FileIsEncryptedError | |
| 66 | 67 | |
| 67 | 68 | |
| 68 | 69 | # types of relevant records (there are much more than listed here) |
| ... | ... | @@ -147,13 +148,17 @@ def is_ppt(filename): |
| 147 | 148 | |
| 148 | 149 | Param filename can be anything that OleFileIO constructor accepts: name of |
| 149 | 150 | file or file data or data stream. |
| 151 | + | |
| 152 | + see also: oleid.OleID.check_powerpoint | |
| 150 | 153 | """ |
| 151 | 154 | have_current_user = False |
| 152 | 155 | have_user_edit = False |
| 153 | 156 | have_persist_dir = False |
| 154 | 157 | have_document_container = False |
| 158 | + ppt_file = None | |
| 155 | 159 | try: |
| 156 | - for stream in PptFile(filename).iter_streams(): | |
| 160 | + ppt_file = PptFile(filename) | |
| 161 | + for stream in ppt_file.iter_streams(): | |
| 157 | 162 | if stream.name == 'Current User': |
| 158 | 163 | for record in stream.iter_records(): |
| 159 | 164 | if isinstance(record, PptRecordCurrentUser): |
| ... | ... | @@ -176,6 +181,11 @@ def is_ppt(filename): |
| 176 | 181 | return True |
| 177 | 182 | else: # ignore other streams/storages since they are optional |
| 178 | 183 | continue |
| 184 | + except FileIsEncryptedError: | |
| 185 | + assert ppt_file is not None, \ | |
| 186 | + 'Encryption error should not be raised from just opening OLE file.' | |
| 187 | + # just rely on stream names, copied from oleid | |
| 188 | + return ppt_file.exists('PowerPoint Document') | |
| 179 | 189 | except Exception: |
| 180 | 190 | pass |
| 181 | 191 | return False | ... | ... |
oletools/record_base.py
| ... | ... | @@ -44,6 +44,7 @@ __version__ = '0.54dev1' |
| 44 | 44 | # TODO: |
| 45 | 45 | # - read DocumentSummaryInformation first to get more info about streams |
| 46 | 46 | # (maybe content type or so; identify streams that are never record-based) |
| 47 | +# Or use oleid to avoid same functionality in several files | |
| 47 | 48 | # - think about integrating this with olefile itself |
| 48 | 49 | |
| 49 | 50 | # ----------------------------------------------------------------------------- |
| ... | ... | @@ -62,6 +63,18 @@ import logging |
| 62 | 63 | |
| 63 | 64 | import olefile |
| 64 | 65 | |
| 66 | +try: | |
| 67 | + from oletools.common.errors import FileIsEncryptedError | |
| 68 | +except ImportError: | |
| 69 | + # little hack to allow absolute imports even if oletools is not installed. | |
| 70 | + PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname( | |
| 71 | + os.path.abspath(__file__)))) | |
| 72 | + if PARENT_DIR not in sys.path: | |
| 73 | + sys.path.insert(0, PARENT_DIR) | |
| 74 | + del PARENT_DIR | |
| 75 | + from oletools.common.errors import FileIsEncryptedError | |
| 76 | +from oletools import oleid | |
| 77 | + | |
| 65 | 78 | |
| 66 | 79 | ############################################################################### |
| 67 | 80 | # Helpers |
| ... | ... | @@ -111,6 +124,12 @@ class OleRecordFile(olefile.OleFileIO): |
| 111 | 124 | Subclass of OleFileIO! |
| 112 | 125 | """ |
| 113 | 126 | |
| 127 | + def open(self, filename, *args, **kwargs): | |
| 128 | + """Call OleFileIO.open, raise error if is encrypted.""" | |
| 129 | + #super(OleRecordFile, self).open(filename, *args, **kwargs) | |
| 130 | + OleFileIO.open(self, filename, *args, **kwargs) | |
| 131 | + self.is_encrypted = oleid.OleID(self).check_encrypted().value | |
| 132 | + | |
| 114 | 133 | @classmethod |
| 115 | 134 | def stream_class_for_name(cls, stream_name): |
| 116 | 135 | """ helper for iter_streams, must be overwritten in subclasses |
| ... | ... | @@ -142,7 +161,8 @@ class OleRecordFile(olefile.OleFileIO): |
| 142 | 161 | stream = clz(self._open(direntry.isectStart, direntry.size), |
| 143 | 162 | direntry.size, |
| 144 | 163 | None if is_orphan else direntry.name, |
| 145 | - direntry.entry_type) | |
| 164 | + direntry.entry_type, | |
| 165 | + self.is_encrypted) | |
| 146 | 166 | yield stream |
| 147 | 167 | stream.close() |
| 148 | 168 | |
| ... | ... | @@ -155,13 +175,14 @@ class OleRecordStream(object): |
| 155 | 175 | abstract base class |
| 156 | 176 | """ |
| 157 | 177 | |
| 158 | - def __init__(self, stream, size, name, stream_type): | |
| 178 | + def __init__(self, stream, size, name, stream_type, is_encrypted=False): | |
| 159 | 179 | self.stream = stream |
| 160 | 180 | self.size = size |
| 161 | 181 | self.name = name |
| 162 | 182 | if stream_type not in ENTRY_TYPE2STR: |
| 163 | 183 | raise ValueError('Unknown stream type: {0}'.format(stream_type)) |
| 164 | 184 | self.stream_type = stream_type |
| 185 | + self.is_encrypted = is_encrypted | |
| 165 | 186 | |
| 166 | 187 | def read_record_head(self): |
| 167 | 188 | """ read first few bytes of record to determine size and type |
| ... | ... | @@ -190,6 +211,9 @@ class OleRecordStream(object): |
| 190 | 211 | |
| 191 | 212 | Stream must be positioned at start of records (e.g. start of stream). |
| 192 | 213 | """ |
| 214 | + if self.is_encrypted: | |
| 215 | + raise FileIsEncryptedError() | |
| 216 | + | |
| 193 | 217 | while True: |
| 194 | 218 | # unpacking as in olevba._extract_vba |
| 195 | 219 | pos = self.stream.tell() |
| ... | ... | @@ -234,6 +258,8 @@ class OleSummaryInformationStream(OleRecordStream): |
| 234 | 258 | |
| 235 | 259 | Do nothing so far. OleFileIO reads quite some info from this. For more info |
| 236 | 260 | see [MS-OSHARED] 2.3.3 and [MS-OLEPS] 2.21 and references therein. |
| 261 | + | |
| 262 | + See also: info read in oleid.py. | |
| 237 | 263 | """ |
| 238 | 264 | def iter_records(self, fill_data=False): |
| 239 | 265 | """ yields nothing, stops at once """ | ... | ... |
oletools/xls_parser.py
| ... | ... | @@ -86,14 +86,16 @@ def is_xls(filename): |
| 86 | 86 | returns True if given file is an ole file and contains a Workbook stream |
| 87 | 87 | |
| 88 | 88 | todo: could further check that workbook stream starts with a globals |
| 89 | - substream | |
| 89 | + substream. | |
| 90 | + See also: oleid.OleID.check_excel | |
| 90 | 91 | """ |
| 91 | 92 | try: |
| 92 | 93 | for stream in XlsFile(filename).iter_streams(): |
| 93 | 94 | if isinstance(stream, WorkbookStream): |
| 94 | 95 | return True |
| 95 | 96 | except Exception: |
| 96 | - return False | |
| 97 | + pass | |
| 98 | + return False | |
| 97 | 99 | |
| 98 | 100 | |
| 99 | 101 | def read_unicode(data, start_idx, n_chars): |
| ... | ... | @@ -130,6 +132,8 @@ class XlsFile(record_base.OleRecordFile): |
| 130 | 132 | @classmethod |
| 131 | 133 | def stream_class_for_name(cls, stream_name): |
| 132 | 134 | """ helper for iter_streams """ |
| 135 | + if stream_name == 'Workbook': | |
| 136 | + return WorkbookStream | |
| 133 | 137 | return XlsStream |
| 134 | 138 | |
| 135 | 139 | ... | ... |
tests/msodde/test_basic.py
| ... | ... | @@ -11,6 +11,7 @@ from __future__ import print_function |
| 11 | 11 | import unittest |
| 12 | 12 | from oletools import msodde |
| 13 | 13 | from tests.test_utils import DATA_BASE_DIR as BASE_DIR |
| 14 | +import os | |
| 14 | 15 | from os.path import join |
| 15 | 16 | from traceback import print_exc |
| 16 | 17 | |
| ... | ... | @@ -55,6 +56,20 @@ class TestReturnCode(unittest.TestCase): |
| 55 | 56 | """ check that text file argument leads to non-zero exit status """ |
| 56 | 57 | self.do_test_validity(join(BASE_DIR, 'basic/text'), True) |
| 57 | 58 | |
| 59 | + def test_encrypted(self): | |
| 60 | + """ | |
| 61 | + check that encrypted files lead to non-zero exit status | |
| 62 | + | |
| 63 | + Currently, only the encryption applied by Office 2010 (CryptoApi RC4 | |
| 64 | + Encryption) is tested. | |
| 65 | + """ | |
| 66 | + CRYPT_DIR = join(BASE_DIR, 'encrypted') | |
| 67 | + ADD_ARGS = '', '-j', '-d', '-f', '-a' | |
| 68 | + for filename in os.listdir(CRYPT_DIR): | |
| 69 | + full_name = join(CRYPT_DIR, filename) | |
| 70 | + for args in ADD_ARGS: | |
| 71 | + self.do_test_validity(args + ' ' + full_name, True) | |
| 72 | + | |
| 58 | 73 | def do_test_validity(self, args, expect_error=False): |
| 59 | 74 | """ helper for test_valid_doc[x] """ |
| 60 | 75 | have_exception = False | ... | ... |
tests/oleid/test_basic.py
0 → 100644
| 1 | +""" | |
| 2 | +Test basic functionality of oleid | |
| 3 | + | |
| 4 | +Should work with python2 and python3! | |
| 5 | +""" | |
| 6 | + | |
| 7 | +import unittest | |
| 8 | +import os | |
| 9 | +from os.path import join, relpath, splitext | |
| 10 | +from oletools import oleid | |
| 11 | + | |
| 12 | +# Directory with test data, independent of current working directory | |
| 13 | +from tests.test_utils import DATA_BASE_DIR | |
| 14 | + | |
| 15 | + | |
| 16 | +class TestOleIDBasic(unittest.TestCase): | |
| 17 | + """Test basic functionality of OleID""" | |
| 18 | + | |
| 19 | + def test_all(self): | |
| 20 | + """Run all file in test-data through oleid and compare to known ouput""" | |
| 21 | + # this relies on order of indicators being constant, could relax that | |
| 22 | + # Also requires that files have the correct suffixes (no rtf in doc) | |
| 23 | + NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '') | |
| 24 | + NON_OLE_VALUES = (False, ) | |
| 25 | + WORD = b'Microsoft Office Word' | |
| 26 | + PPT = b'Microsoft Office PowerPoint' | |
| 27 | + EXCEL = b'Microsoft Excel' | |
| 28 | + CRYPT = (True, False, 'unknown', True, False, False, False, False, | |
| 29 | + False, False, 0) | |
| 30 | + OLE_VALUES = { | |
| 31 | + 'oleobj/sample_with_lnk_file.doc': (True, True, WORD, False, True, | |
| 32 | + False, False, False, False, | |
| 33 | + True, 0), | |
| 34 | + 'oleobj/embedded-simple-2007.xlsb': (False,), | |
| 35 | + 'oleobj/embedded-simple-2007.docm': (False,), | |
| 36 | + 'oleobj/embedded-simple-2007.xltx': (False,), | |
| 37 | + 'oleobj/embedded-simple-2007.xlam': (False,), | |
| 38 | + 'oleobj/embedded-simple-2007.dotm': (False,), | |
| 39 | + 'oleobj/sample_with_lnk_file.ppt': (True, True, PPT, False, False, | |
| 40 | + False, False, True, False, | |
| 41 | + False, 0), | |
| 42 | + 'oleobj/embedded-simple-2007.xlsx': (False,), | |
| 43 | + 'oleobj/embedded-simple-2007.xlsm': (False,), | |
| 44 | + 'oleobj/embedded-simple-2007.ppsx': (False,), | |
| 45 | + 'oleobj/embedded-simple-2007.pps': (True, True, PPT, False, False, | |
| 46 | + False, False, True, False, | |
| 47 | + False, 0), | |
| 48 | + 'oleobj/embedded-simple-2007.xla': (True, True, EXCEL, False, | |
| 49 | + False, False, True, False, | |
| 50 | + False, False, 0), | |
| 51 | + 'oleobj/sample_with_calc_embedded.doc': (True, True, WORD, False, | |
| 52 | + True, False, False, False, | |
| 53 | + False, True, 0), | |
| 54 | + 'oleobj/embedded-unicode-2007.docx': (False,), | |
| 55 | + 'oleobj/embedded-unicode.doc': (True, True, WORD, False, True, | |
| 56 | + False, False, False, False, True, | |
| 57 | + 0), | |
| 58 | + 'oleobj/embedded-simple-2007.doc': (True, True, WORD, False, True, | |
| 59 | + False, False, False, False, | |
| 60 | + True, 0), | |
| 61 | + 'oleobj/embedded-simple-2007.xls': (True, True, EXCEL, False, | |
| 62 | + False, False, True, False, | |
| 63 | + False, False, 0), | |
| 64 | + 'oleobj/embedded-simple-2007.dot': (True, True, WORD, False, True, | |
| 65 | + False, False, False, False, | |
| 66 | + True, 0), | |
| 67 | + 'oleobj/sample_with_lnk_to_calc.doc': (True, True, WORD, False, | |
| 68 | + True, False, False, False, | |
| 69 | + False, True, 0), | |
| 70 | + 'oleobj/embedded-simple-2007.ppt': (True, True, PPT, False, False, | |
| 71 | + False, False, True, False, | |
| 72 | + False, 0), | |
| 73 | + 'oleobj/sample_with_lnk_file.pps': (True, True, PPT, False, False, | |
| 74 | + False, False, True, False, | |
| 75 | + False, 0), | |
| 76 | + 'oleobj/embedded-simple-2007.pptx': (False,), | |
| 77 | + 'oleobj/embedded-simple-2007.ppsm': (False,), | |
| 78 | + 'oleobj/embedded-simple-2007.dotx': (False,), | |
| 79 | + 'oleobj/embedded-simple-2007.pptm': (False,), | |
| 80 | + 'oleobj/embedded-simple-2007.xlt': (True, True, EXCEL, False, | |
| 81 | + False, False, True, False, | |
| 82 | + False, False, 0), | |
| 83 | + 'oleobj/embedded-simple-2007.docx': (False,), | |
| 84 | + 'oleobj/embedded-simple-2007.potx': (False,), | |
| 85 | + 'oleobj/embedded-simple-2007.pot': (True, True, PPT, False, False, | |
| 86 | + False, False, True, False, | |
| 87 | + False, 0), | |
| 88 | + 'oleobj/embedded-simple-2007.xltm': (False,), | |
| 89 | + 'oleobj/embedded-simple-2007.potm': (False,), | |
| 90 | + 'encrypted/encrypted.xlsx': CRYPT, | |
| 91 | + 'encrypted/encrypted.docm': CRYPT, | |
| 92 | + 'encrypted/encrypted.docx': CRYPT, | |
| 93 | + 'encrypted/encrypted.pptm': CRYPT, | |
| 94 | + 'encrypted/encrypted.xlsb': CRYPT, | |
| 95 | + 'encrypted/encrypted.xls': (True, True, EXCEL, True, False, False, | |
| 96 | + True, False, False, False, 0), | |
| 97 | + 'encrypted/encrypted.ppt': (True, False, 'unknown', True, False, | |
| 98 | + False, False, True, False, False, 0), | |
| 99 | + 'encrypted/encrypted.pptx': CRYPT, | |
| 100 | + 'encrypted/encrypted.xlsm': CRYPT, | |
| 101 | + 'encrypted/encrypted.doc': (True, True, WORD, True, True, False, | |
| 102 | + False, False, False, False, 0), | |
| 103 | + 'msodde/harmless-clean.docm': (False,), | |
| 104 | + 'msodde/dde-in-csv.csv': (False,), | |
| 105 | + 'msodde/dde-test-from-office2013-utf_16le-korean.doc': | |
| 106 | + (True, True, WORD, False, True, False, False, False, False, | |
| 107 | + False, 0), | |
| 108 | + 'msodde/harmless-clean.doc': (True, True, WORD, False, True, False, | |
| 109 | + False, False, False, False, 0), | |
| 110 | + 'msodde/dde-test.docm': (False,), | |
| 111 | + 'msodde/dde-test.xlsb': (False,), | |
| 112 | + 'msodde/dde-test.xlsm': (False,), | |
| 113 | + 'msodde/dde-test.docx': (False,), | |
| 114 | + 'msodde/dde-test.xlsx': (False,), | |
| 115 | + 'msodde/dde-test-from-office2003.doc': (True, True, WORD, False, | |
| 116 | + True, False, False, False, | |
| 117 | + False, False, 0), | |
| 118 | + 'msodde/dde-test-from-office2016.doc': (True, True, WORD, False, | |
| 119 | + True, False, False, False, | |
| 120 | + False, False, 0), | |
| 121 | + 'msodde/harmless-clean.docx': (False,), | |
| 122 | + 'oleform/oleform-PR314.docm': (False,), | |
| 123 | + 'basic/encrypted.docx': CRYPT, | |
| 124 | + } | |
| 125 | + | |
| 126 | + indicator_names = [] | |
| 127 | + for base_dir, _, files in os.walk(DATA_BASE_DIR): | |
| 128 | + for filename in files: | |
| 129 | + full_path = join(base_dir, filename) | |
| 130 | + name = relpath(full_path, DATA_BASE_DIR) | |
| 131 | + values = tuple(indicator.value for indicator in | |
| 132 | + oleid.OleID(full_path).check()) | |
| 133 | + if len(indicator_names) < 2: # not initialized with ole yet | |
| 134 | + indicator_names = tuple(indicator.name for indicator in | |
| 135 | + oleid.OleID(full_path).check()) | |
| 136 | + suffix = splitext(filename)[1] | |
| 137 | + if suffix in NON_OLE_SUFFIXES: | |
| 138 | + self.assertEqual(values, NON_OLE_VALUES, | |
| 139 | + msg='For non-ole file {} expected {}, ' | |
| 140 | + 'not {}'.format(name, NON_OLE_VALUES, | |
| 141 | + values)) | |
| 142 | + continue | |
| 143 | + try: | |
| 144 | + self.assertEqual(values, OLE_VALUES[name], | |
| 145 | + msg='Wrong detail values for {}:\n' | |
| 146 | + ' Names {}\n Found {}\n Expect {}' | |
| 147 | + .format(name, indicator_names, values, | |
| 148 | + OLE_VALUES[name])) | |
| 149 | + except KeyError: | |
| 150 | + print('Should add oleid output for {} to {} ({})' | |
| 151 | + .format(name, __name__, values[3:])) | |
| 152 | + | |
| 153 | +# just in case somebody calls this file as a script | |
| 154 | +if __name__ == '__main__': | |
| 155 | + unittest.main() | ... | ... |
tests/olevba/__init__.py
0 → 100644
tests/olevba/test_basic.py
0 → 100644
| 1 | +""" | |
| 2 | +Test basic functionality of olevba[3] | |
| 3 | +""" | |
| 4 | + | |
| 5 | +import unittest | |
| 6 | +import sys | |
| 7 | +if sys.version_info.major <= 2: | |
| 8 | + from oletools import olevba | |
| 9 | +else: | |
| 10 | + from oletools import olevba3 as olevba | |
| 11 | +import os | |
| 12 | +from os.path import join | |
| 13 | + | |
| 14 | +# Directory with test data, independent of current working directory | |
| 15 | +from tests.test_utils import DATA_BASE_DIR | |
| 16 | + | |
| 17 | + | |
| 18 | +class TestOlevbaBasic(unittest.TestCase): | |
| 19 | + """Tests olevba basic functionality""" | |
| 20 | + | |
| 21 | + def test_crypt_return(self): | |
| 22 | + """ | |
| 23 | + Tests that encrypted files give a certain return code. | |
| 24 | + | |
| 25 | + Currently, only the encryption applied by Office 2010 (CryptoApi RC4 | |
| 26 | + Encryption) is tested. | |
| 27 | + """ | |
| 28 | + CRYPT_DIR = join(DATA_BASE_DIR, 'encrypted') | |
| 29 | + CRYPT_RETURN_CODE = 9 | |
| 30 | + ADD_ARGS = [], ['-d', ], ['-a', ], ['-j', ], ['-t', ] | |
| 31 | + for filename in os.listdir(CRYPT_DIR): | |
| 32 | + full_name = join(CRYPT_DIR, filename) | |
| 33 | + for args in ADD_ARGS: | |
| 34 | + try: | |
| 35 | + ret_code = olevba.main(args + [full_name, ]) | |
| 36 | + except SystemExit as se: | |
| 37 | + ret_code = se.code or 0 # se.code can be None | |
| 38 | + self.assertEqual(ret_code, CRYPT_RETURN_CODE, | |
| 39 | + msg='Wrong return code {} for args {}' | |
| 40 | + .format(ret_code, args + [filename, ])) | |
| 41 | + | |
| 42 | + | |
| 43 | +# just in case somebody calls this file as a script | |
| 44 | +if __name__ == '__main__': | |
| 45 | + unittest.main() | ... | ... |
tests/test-data/encrypted/encrypted.doc
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.docm
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.docx
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.ppt
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.pptm
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.pptx
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xls
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xlsb
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xlsm
0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xlsx
0 → 100644
No preview for this file type