Commit e8b6dd46b5b6221d8ff6725862f3b14fa15f2128

Authored by Philippe Lagadec
Committed by GitHub
2 parents d1f2a554 72684467

Merge pull request #362 from christian-intra2net/encrypt-detect-and-raise

Encrypt detect and raise
oletools/common/errors.py 0 → 100644
  1 +"""
  2 +Errors used in several tools to avoid duplication
  3 +
  4 +.. codeauthor:: Intra2net AG <info@intra2net.com>
  5 +"""
  6 +
  7 +class FileIsEncryptedError(ValueError):
  8 + """Exception thrown if file is encrypted and cannot deal with it."""
  9 + # see also: same class in olevba[3] and record_base
  10 + def __init__(self, filename=None):
  11 + super(FileIsEncryptedError, self).__init__(
  12 + 'Office file {}is encrypted, not yet supported'
  13 + .format('' if filename is None else filename + ' '))
oletools/msodde.py
@@ -11,6 +11,7 @@ Supported formats: @@ -11,6 +11,7 @@ Supported formats:
11 - RTF 11 - RTF
12 - CSV (exported from / imported into Excel) 12 - CSV (exported from / imported into Excel)
13 - XML (exported from Word 2003, Word 2007+, Excel 2003, (Excel 2007+?) 13 - XML (exported from Word 2003, Word 2007+, Excel 2003, (Excel 2007+?)
  14 +- raises an error if run with files encrypted using MS Crypto API RC4
14 15
15 Author: Philippe Lagadec - http://www.decalage.info 16 Author: Philippe Lagadec - http://www.decalage.info
16 License: BSD, see source code or documentation 17 License: BSD, see source code or documentation
@@ -61,7 +62,9 @@ import olefile @@ -61,7 +62,9 @@ import olefile
61 from oletools import ooxml 62 from oletools import ooxml
62 from oletools import xls_parser 63 from oletools import xls_parser
63 from oletools import rtfobj 64 from oletools import rtfobj
  65 +from oletools import oleid
64 from oletools.common.log_helper import log_helper 66 from oletools.common.log_helper import log_helper
  67 +from oletools.common.errors import FileIsEncryptedError
65 68
66 # ----------------------------------------------------------------------------- 69 # -----------------------------------------------------------------------------
67 # CHANGELOG: 70 # CHANGELOG:
@@ -84,6 +87,7 @@ from oletools.common.log_helper import log_helper @@ -84,6 +87,7 @@ from oletools.common.log_helper import log_helper
84 # 2018-01-10 CH: - add single-xml files (Word 2003/2007+ / Excel 2003) 87 # 2018-01-10 CH: - add single-xml files (Word 2003/2007+ / Excel 2003)
85 # 2018-03-21 CH: - added detection for various CSV formulas (issue #259) 88 # 2018-03-21 CH: - added detection for various CSV formulas (issue #259)
86 # 2018-09-11 v0.54 PL: - olefile is now a dependency 89 # 2018-09-11 v0.54 PL: - olefile is now a dependency
  90 +# 2018-10-25 CH: - detect encryption and raise error if detected
87 91
88 __version__ = '0.54dev1' 92 __version__ = '0.54dev1'
89 93
@@ -438,17 +442,18 @@ def process_doc_stream(stream): @@ -438,17 +442,18 @@ def process_doc_stream(stream):
438 return result_parts 442 return result_parts
439 443
440 444
441 -def process_doc(filepath): 445 +def process_doc(ole):
442 """ 446 """
443 find dde links in word ole (.doc/.dot) file 447 find dde links in word ole (.doc/.dot) file
444 448
  449 + Checks whether files is ppt and returns empty immediately in that case
  450 + (ppt files cannot contain DDE-links to my knowledge)
  451 +
445 like process_xml, returns a concatenated unicode string of dde links or 452 like process_xml, returns a concatenated unicode string of dde links or
446 empty if none were found. dde-links will still begin with the dde[auto] key 453 empty if none were found. dde-links will still begin with the dde[auto] key
447 word (possibly after some whitespace) 454 word (possibly after some whitespace)
448 """ 455 """
449 logger.debug('process_doc') 456 logger.debug('process_doc')
450 - ole = olefile.OleFileIO(filepath, path_encoding=None)  
451 -  
452 links = [] 457 links = []
453 for sid, direntry in enumerate(ole.direntries): 458 for sid, direntry in enumerate(ole.direntries):
454 is_orphan = direntry is None 459 is_orphan = direntry is None
@@ -703,8 +708,8 @@ def process_xlsx(filepath): @@ -703,8 +708,8 @@ def process_xlsx(filepath):
703 log_func = logger.debug 708 log_func = logger.debug
704 else: # default 709 else: # default
705 log_func = logger.info 710 log_func = logger.info
706 - log_func('Failed to parse {0} of content type {1}'  
707 - .format(subfile, content_type)) 711 + log_func('Failed to parse {0} of content type {1} ("{2}")'
  712 + .format(subfile, content_type, str(exc)))
708 # in any case: continue with next 713 # in any case: continue with next
709 714
710 return u'\n'.join(dde_links) 715 return u'\n'.join(dde_links)
@@ -886,9 +891,20 @@ def process_file(filepath, field_filter_mode=None): @@ -886,9 +891,20 @@ def process_file(filepath, field_filter_mode=None):
886 if xls_parser.is_xls(filepath): 891 if xls_parser.is_xls(filepath):
887 logger.debug('Process file as excel 2003 (xls)') 892 logger.debug('Process file as excel 2003 (xls)')
888 return process_xls(filepath) 893 return process_xls(filepath)
  894 +
  895 + # encrypted files also look like ole, even if office 2007+ (xml-based)
  896 + # so check for encryption, first
  897 + ole = olefile.OleFileIO(filepath, path_encoding=None)
  898 + oid = oleid.OleID(ole)
  899 + if oid.check_encrypted().value:
  900 + log.debug('is encrypted - raise error')
  901 + raise FileIsEncryptedError(filepath)
  902 + elif oid.check_powerpoint().value:
  903 + log.debug('is ppt - cannot have DDE')
  904 + return u''
889 else: 905 else:
890 logger.debug('Process file as word 2003 (doc)') 906 logger.debug('Process file as word 2003 (doc)')
891 - return process_doc(filepath) 907 + return process_doc(ole)
892 908
893 with open(filepath, 'rb') as file_handle: 909 with open(filepath, 'rb') as file_handle:
894 if file_handle.read(4) == RTF_START: 910 if file_handle.read(4) == RTF_START:
oletools/oleid.py
@@ -6,9 +6,8 @@ oleid is a script to analyze OLE files such as MS Office documents (e.g. Word, @@ -6,9 +6,8 @@ oleid is a script to analyze OLE files such as MS Office documents (e.g. Word,
6 Excel), to detect specific characteristics that could potentially indicate that 6 Excel), to detect specific characteristics that could potentially indicate that
7 the file is suspicious or malicious, in terms of security (e.g. malware). 7 the file is suspicious or malicious, in terms of security (e.g. malware).
8 For example it can detect VBA macros, embedded Flash objects, fragmentation. 8 For example it can detect VBA macros, embedded Flash objects, fragmentation.
9 -The results can be displayed or returned as XML for further processing.  
10 -  
11 -Usage: oleid.py <file> 9 +The results is displayed as ascii table (but could be returned or printed in
  10 +other formats like CSV, XML or JSON in future).
12 11
13 oleid project website: http://www.decalage.info/python/oleid 12 oleid project website: http://www.decalage.info/python/oleid
14 13
@@ -21,8 +20,8 @@ http://www.decalage.info/python/oletools @@ -21,8 +20,8 @@ http://www.decalage.info/python/oletools
21 # oleid is copyright (c) 2012-2018, Philippe Lagadec (http://www.decalage.info) 20 # oleid is copyright (c) 2012-2018, Philippe Lagadec (http://www.decalage.info)
22 # All rights reserved. 21 # All rights reserved.
23 # 22 #
24 -# Redistribution and use in source and binary forms, with or without modification,  
25 -# are permitted provided that the following conditions are met: 23 +# Redistribution and use in source and binary forms, with or without
  24 +# modification, are permitted provided that the following conditions are met:
26 # 25 #
27 # * Redistributions of source code must retain the above copyright notice, this 26 # * Redistributions of source code must retain the above copyright notice, this
28 # list of conditions and the following disclaimer. 27 # list of conditions and the following disclaimer.
@@ -30,16 +29,17 @@ http://www.decalage.info/python/oletools @@ -30,16 +29,17 @@ http://www.decalage.info/python/oletools
30 # this list of conditions and the following disclaimer in the documentation 29 # this list of conditions and the following disclaimer in the documentation
31 # and/or other materials provided with the distribution. 30 # and/or other materials provided with the distribution.
32 # 31 #
33 -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND  
34 -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED  
35 -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE  
36 -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE  
37 -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  
38 -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR  
39 -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER  
40 -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,  
41 -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  
42 -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  33 +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  34 +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  35 +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  36 +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  37 +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  38 +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  39 +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  40 +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41 +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  42 +# POSSIBILITY OF SUCH DAMAGE.
43 43
44 # To improve Python 2+3 compatibility: 44 # To improve Python 2+3 compatibility:
45 from __future__ import print_function 45 from __future__ import print_function
@@ -56,6 +56,8 @@ from __future__ import print_function @@ -56,6 +56,8 @@ from __future__ import print_function
56 # 2017-04-26 PL: - fixed absolute imports (issue #141) 56 # 2017-04-26 PL: - fixed absolute imports (issue #141)
57 # 2017-09-01 SA: - detect OpenXML encryption 57 # 2017-09-01 SA: - detect OpenXML encryption
58 # 2018-09-11 v0.54 PL: - olefile is now a dependency 58 # 2018-09-11 v0.54 PL: - olefile is now a dependency
  59 +# 2018-10-19 CH: - accept olefile as well as filename, return Indicators,
  60 +# improve encryption detection for ppt
59 61
60 __version__ = '0.54dev1' 62 __version__ = '0.54dev1'
61 63
@@ -78,28 +80,27 @@ __version__ = &#39;0.54dev1&#39; @@ -78,28 +80,27 @@ __version__ = &#39;0.54dev1&#39;
78 80
79 #=== IMPORTS ================================================================= 81 #=== IMPORTS =================================================================
80 82
81 -import optparse, sys, os, re, zlib, struct 83 +import argparse, sys, re, zlib, struct
  84 +from os.path import dirname, abspath
82 85
83 -# IMPORTANT: it should be possible to run oletools directly as scripts  
84 -# in any directory without installing them with pip or setup.py.  
85 -# In that case, relative imports are NOT usable.  
86 -# And to enable Python 2+3 compatibility, we need to use absolute imports,  
87 -# so we add the oletools parent folder to sys.path (absolute+normalized path):  
88 -_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))  
89 -# print('_thismodule_dir = %r' % _thismodule_dir)  
90 -_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))  
91 -# print('_parent_dir = %r' % _thirdparty_dir)  
92 -if not _parent_dir in sys.path:  
93 - sys.path.insert(0, _parent_dir) 86 +# little hack to allow absolute imports even if oletools is not installed
  87 +# (required to run oletools directly as scripts in any directory).
  88 +try:
  89 + from oletools.thirdparty import prettytable
  90 +except ImportError:
  91 + PARENT_DIR = dirname(dirname(abspath(__file__)))
  92 + if PARENT_DIR not in sys.path:
  93 + sys.path.insert(0, PARENT_DIR)
  94 + del PARENT_DIR
  95 + from oletools.thirdparty import prettytable
94 96
95 import olefile 97 import olefile
96 -from oletools.thirdparty.prettytable import prettytable  
97 98
98 99
99 100
100 #=== FUNCTIONS =============================================================== 101 #=== FUNCTIONS ===============================================================
101 102
102 -def detect_flash (data): 103 +def detect_flash(data):
103 """ 104 """
104 Detect Flash objects (SWF files) within a binary string of data 105 Detect Flash objects (SWF files) within a binary string of data
105 return a list of (start_index, length, compressed) tuples, or [] if nothing 106 return a list of (start_index, length, compressed) tuples, or [] if nothing
@@ -141,7 +142,7 @@ def detect_flash (data): @@ -141,7 +142,7 @@ def detect_flash (data):
141 compressed_data = swf[8:] 142 compressed_data = swf[8:]
142 try: 143 try:
143 zlib.decompress(compressed_data) 144 zlib.decompress(compressed_data)
144 - except: 145 + except Exception:
145 continue 146 continue
146 # else we don't check anything at this stage, we only assume it is a 147 # else we don't check anything at this stage, we only assume it is a
147 # valid SWF. So there might be false positives for uncompressed SWF. 148 # valid SWF. So there might be false positives for uncompressed SWF.
@@ -152,9 +153,15 @@ def detect_flash (data): @@ -152,9 +153,15 @@ def detect_flash (data):
152 153
153 #=== CLASSES ================================================================= 154 #=== CLASSES =================================================================
154 155
155 -class Indicator (object): 156 +class Indicator(object):
  157 + """
  158 + Piece of information of an :py:class:`OleID` object.
  159 +
  160 + Contains an ID, value, type, name and description. No other functionality.
  161 + """
156 162
157 - def __init__(self, _id, value=None, _type=bool, name=None, description=None): 163 + def __init__(self, _id, value=None, _type=bool, name=None,
  164 + description=None):
158 self.id = _id 165 self.id = _id
159 self.value = value 166 self.value = value
160 self.type = _type 167 self.type = _type
@@ -164,21 +171,55 @@ class Indicator (object): @@ -164,21 +171,55 @@ class Indicator (object):
164 self.description = description 171 self.description = description
165 172
166 173
167 -class OleID: 174 +class OleID(object):
  175 + """
  176 + Summary of information about an OLE file
168 177
169 - def __init__(self, filename):  
170 - self.filename = filename 178 + Call :py:meth:`OleID.check` to gather all info on a given file or run one
  179 + of the `check_` functions to just get a specific piece of info.
  180 + """
  181 +
  182 + def __init__(self, input_file):
  183 + """
  184 + Create an OleID object
  185 +
  186 + This does not run any checks yet nor open the file.
  187 +
  188 + Can either give just a filename (as str), so OleID will check whether
  189 + that is a valid OLE file and create a :py:class:`olefile.OleFileIO`
  190 + object for it. Or you can give an already opened
  191 + :py:class:`olefile.OleFileIO` as argument to avoid re-opening (e.g. if
  192 + called from other oletools).
  193 +
  194 + If filename is given, only :py:meth:`OleID.check` opens the file. Other
  195 + functions will return None
  196 + """
  197 + if isinstance(input_file, olefile.OleFileIO):
  198 + self.ole = input_file
  199 + self.filename = None
  200 + else:
  201 + self.filename = input_file
  202 + self.ole = None
171 self.indicators = [] 203 self.indicators = []
  204 + self.suminfo_data = None
172 205
173 def check(self): 206 def check(self):
  207 + """
  208 + Open file and run all checks on it.
  209 +
  210 + :returns: list of all :py:class:`Indicator`s created
  211 + """
174 # check if it is actually an OLE file: 212 # check if it is actually an OLE file:
175 oleformat = Indicator('ole_format', True, name='OLE format') 213 oleformat = Indicator('ole_format', True, name='OLE format')
176 self.indicators.append(oleformat) 214 self.indicators.append(oleformat)
177 - if not olefile.isOleFile(self.filename): 215 + if self.ole:
  216 + oleformat.value = True
  217 + elif not olefile.isOleFile(self.filename):
178 oleformat.value = False 218 oleformat.value = False
179 return self.indicators 219 return self.indicators
180 - # parse file:  
181 - self.ole = olefile.OleFileIO(self.filename) 220 + else:
  221 + # parse file:
  222 + self.ole = olefile.OleFileIO(self.filename)
182 # checks: 223 # checks:
183 self.check_properties() 224 self.check_properties()
184 self.check_encrypted() 225 self.check_encrypted()
@@ -186,143 +227,274 @@ class OleID: @@ -186,143 +227,274 @@ class OleID:
186 self.check_excel() 227 self.check_excel()
187 self.check_powerpoint() 228 self.check_powerpoint()
188 self.check_visio() 229 self.check_visio()
189 - self.check_ObjectPool() 230 + self.check_object_pool()
190 self.check_flash() 231 self.check_flash()
191 self.ole.close() 232 self.ole.close()
192 return self.indicators 233 return self.indicators
193 234
194 - def check_properties (self):  
195 - suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream') 235 + def check_properties(self):
  236 + """
  237 + Read summary information required for other check_* functions
  238 +
  239 + :returns: 2 :py:class:`Indicator`s (for presence of summary info and
  240 + application name) or None if file was not opened
  241 + """
  242 + suminfo = Indicator('has_suminfo', False,
  243 + name='Has SummaryInformation stream')
196 self.indicators.append(suminfo) 244 self.indicators.append(suminfo)
197 - appname = Indicator('appname', 'unknown', _type=str, name='Application name') 245 + appname = Indicator('appname', 'unknown', _type=str,
  246 + name='Application name')
198 self.indicators.append(appname) 247 self.indicators.append(appname)
199 - self.suminfo = {}  
200 - # check stream SummaryInformation 248 + if not self.ole:
  249 + return None, None
  250 + self.suminfo_data = {}
  251 + # check stream SummaryInformation (not present e.g. in encrypted ppt)
201 if self.ole.exists("\x05SummaryInformation"): 252 if self.ole.exists("\x05SummaryInformation"):
202 suminfo.value = True 253 suminfo.value = True
203 - self.suminfo = self.ole.getproperties("\x05SummaryInformation") 254 + self.suminfo_data = self.ole.getproperties("\x05SummaryInformation")
204 # check application name: 255 # check application name:
205 - appname.value = self.suminfo.get(0x12, 'unknown')  
206 -  
207 - def check_encrypted (self): 256 + appname.value = self.suminfo_data.get(0x12, 'unknown')
  257 + return suminfo, appname
  258 +
  259 + def get_indicator(self, indicator_id):
  260 + """Helper function: returns an indicator if present (or None)"""
  261 + result = [indicator for indicator in self.indicators
  262 + if indicator.id == indicator_id]
  263 + if result:
  264 + return result[0]
  265 + else:
  266 + return None
  267 +
  268 + def check_encrypted(self):
  269 + """
  270 + Check whether this file is encrypted.
  271 +
  272 + Might call check_properties.
  273 +
  274 + :returns: :py:class:`Indicator` for encryption or None if file was not
  275 + opened
  276 + """
208 # we keep the pointer to the indicator, can be modified by other checks: 277 # we keep the pointer to the indicator, can be modified by other checks:
209 - self.encrypted = Indicator('encrypted', False, name='Encrypted')  
210 - self.indicators.append(self.encrypted) 278 + encrypted = Indicator('encrypted', False, name='Encrypted')
  279 + self.indicators.append(encrypted)
  280 + if not self.ole:
  281 + return None
211 # check if bit 1 of security field = 1: 282 # check if bit 1 of security field = 1:
212 # (this field may be missing for Powerpoint2000, for example) 283 # (this field may be missing for Powerpoint2000, for example)
213 - if 0x13 in self.suminfo:  
214 - if self.suminfo[0x13] & 1:  
215 - self.encrypted.value = True 284 + if self.suminfo_data is None:
  285 + self.check_properties()
  286 + if 0x13 in self.suminfo_data:
  287 + if self.suminfo_data[0x13] & 1:
  288 + encrypted.value = True
216 # check if this is an OpenXML encrypted file 289 # check if this is an OpenXML encrypted file
217 elif self.ole.exists('EncryptionInfo'): 290 elif self.ole.exists('EncryptionInfo'):
218 - self.encrypted.value = True  
219 -  
220 - def check_word (self):  
221 - word = Indicator('word', False, name='Word Document',  
222 - description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.') 291 + encrypted.value = True
  292 + # or an encrypted ppt file
  293 + if self.ole.exists('EncryptedSummary') and \
  294 + not self.ole.exists('SummaryInformation'):
  295 + encrypted.value = True
  296 + return encrypted
  297 +
  298 + def check_word(self):
  299 + """
  300 + Check whether this file is a word document
  301 +
  302 + If this finds evidence of encryption, will correct/add encryption
  303 + indicator.
  304 +
  305 + :returns: 2 :py:class:`Indicator`s (for word and vba_macro) or None if
  306 + file was not opened
  307 + """
  308 + word = Indicator(
  309 + 'word', False, name='Word Document',
  310 + description='Contains a WordDocument stream, very likely to be a '
  311 + 'Microsoft Word Document.')
223 self.indicators.append(word) 312 self.indicators.append(word)
224 - self.macros = Indicator('vba_macros', False, name='VBA Macros')  
225 - self.indicators.append(self.macros) 313 + macros = Indicator('vba_macros', False, name='VBA Macros')
  314 + self.indicators.append(macros)
  315 + if not self.ole:
  316 + return None, None
226 if self.ole.exists('WordDocument'): 317 if self.ole.exists('WordDocument'):
227 word.value = True 318 word.value = True
228 # check for Word-specific encryption flag: 319 # check for Word-specific encryption flag:
229 - s = self.ole.openstream(["WordDocument"])  
230 - # pass header 10 bytes  
231 - s.read(10)  
232 - # read flag structure:  
233 - temp16 = struct.unpack("H", s.read(2))[0]  
234 - fEncrypted = (temp16 & 0x0100) >> 8  
235 - if fEncrypted:  
236 - self.encrypted.value = True  
237 - s.close() 320 + stream = None
  321 + try:
  322 + stream = self.ole.openstream(["WordDocument"])
  323 + # pass header 10 bytes
  324 + stream.read(10)
  325 + # read flag structure:
  326 + temp16 = struct.unpack("H", stream.read(2))[0]
  327 + f_encrypted = (temp16 & 0x0100) >> 8
  328 + if f_encrypted:
  329 + # correct encrypted indicator if present or add one
  330 + encrypt_ind = self.get_indicator('encrypted')
  331 + if encrypt_ind:
  332 + encrypt_ind.value = True
  333 + else:
  334 + self.indicators.append('encrypted', True, name='Encrypted')
  335 + except Exception:
  336 + raise
  337 + finally:
  338 + if stream is not None:
  339 + stream.close()
238 # check for VBA macros: 340 # check for VBA macros:
239 if self.ole.exists('Macros'): 341 if self.ole.exists('Macros'):
240 - self.macros.value = True 342 + macros.value = True
  343 + return word, macros
  344 +
  345 + def check_excel(self):
  346 + """
  347 + Check whether this file is an excel workbook.
  348 +
  349 + If this finds macros, will add/correct macro indicator.
241 350
242 - def check_excel (self):  
243 - excel = Indicator('excel', False, name='Excel Workbook',  
244 - description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.') 351 + see also: :py:func:`xls_parser.is_xls`
  352 +
  353 + :returns: :py:class:`Indicator` for excel or (None, None) if file was
  354 + not opened
  355 + """
  356 + excel = Indicator(
  357 + 'excel', False, name='Excel Workbook',
  358 + description='Contains a Workbook or Book stream, very likely to be '
  359 + 'a Microsoft Excel Workbook.')
245 self.indicators.append(excel) 360 self.indicators.append(excel)
  361 + if not self.ole:
  362 + return None
246 #self.macros = Indicator('vba_macros', False, name='VBA Macros') 363 #self.macros = Indicator('vba_macros', False, name='VBA Macros')
247 #self.indicators.append(self.macros) 364 #self.indicators.append(self.macros)
248 if self.ole.exists('Workbook') or self.ole.exists('Book'): 365 if self.ole.exists('Workbook') or self.ole.exists('Book'):
249 excel.value = True 366 excel.value = True
250 # check for VBA macros: 367 # check for VBA macros:
251 if self.ole.exists('_VBA_PROJECT_CUR'): 368 if self.ole.exists('_VBA_PROJECT_CUR'):
252 - self.macros.value = True  
253 -  
254 - def check_powerpoint (self):  
255 - ppt = Indicator('ppt', False, name='PowerPoint Presentation',  
256 - description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.') 369 + # correct macro indicator if present or add one
  370 + macro_ind = self.get_indicator('vba_macros')
  371 + if macro_ind:
  372 + macro_ind.value = True
  373 + else:
  374 + self.indicators.append('vba_macros', True,
  375 + name='VBA Macros')
  376 + return excel
  377 +
  378 + def check_powerpoint(self):
  379 + """
  380 + Check whether this file is a powerpoint presentation
  381 +
  382 + see also: :py:func:`ppt_record_parser.is_ppt`
  383 +
  384 + :returns: :py:class:`Indicator` for whether this is a powerpoint
  385 + presentation or not or None if file was not opened
  386 + """
  387 + ppt = Indicator(
  388 + 'ppt', False, name='PowerPoint Presentation',
  389 + description='Contains a PowerPoint Document stream, very likely to '
  390 + 'be a Microsoft PowerPoint Presentation.')
257 self.indicators.append(ppt) 391 self.indicators.append(ppt)
  392 + if not self.ole:
  393 + return None
258 if self.ole.exists('PowerPoint Document'): 394 if self.ole.exists('PowerPoint Document'):
259 ppt.value = True 395 ppt.value = True
260 -  
261 - def check_visio (self):  
262 - visio = Indicator('visio', False, name='Visio Drawing',  
263 - description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.') 396 + return ppt
  397 +
  398 + def check_visio(self):
  399 + """Check whether this file is a visio drawing"""
  400 + visio = Indicator(
  401 + 'visio', False, name='Visio Drawing',
  402 + description='Contains a VisioDocument stream, very likely to be a '
  403 + 'Microsoft Visio Drawing.')
264 self.indicators.append(visio) 404 self.indicators.append(visio)
  405 + if not self.ole:
  406 + return None
265 if self.ole.exists('VisioDocument'): 407 if self.ole.exists('VisioDocument'):
266 visio.value = True 408 visio.value = True
  409 + return visio
  410 +
  411 + def check_object_pool(self):
  412 + """
  413 + Check whether this file contains an ObjectPool stream.
  414 +
  415 + Such a stream would be a strong indicator for embedded objects or files.
267 416
268 - def check_ObjectPool (self):  
269 - objpool = Indicator('ObjectPool', False, name='ObjectPool',  
270 - description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.') 417 + :returns: :py:class:`Indicator` for ObjectPool stream or None if file
  418 + was not opened
  419 + """
  420 + objpool = Indicator(
  421 + 'ObjectPool', False, name='ObjectPool',
  422 + description='Contains an ObjectPool stream, very likely to contain '
  423 + 'embedded OLE objects or files.')
271 self.indicators.append(objpool) 424 self.indicators.append(objpool)
  425 + if not self.ole:
  426 + return None
272 if self.ole.exists('ObjectPool'): 427 if self.ole.exists('ObjectPool'):
273 objpool.value = True 428 objpool.value = True
274 -  
275 -  
276 - def check_flash (self):  
277 - flash = Indicator('flash', 0, _type=int, name='Flash objects',  
278 - description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.') 429 + return objpool
  430 +
  431 + def check_flash(self):
  432 + """
  433 + Check whether this file contains flash objects
  434 +
  435 + :returns: :py:class:`Indicator` for count of flash objects or None if
  436 + file was not opened
  437 + """
  438 + flash = Indicator(
  439 + 'flash', 0, _type=int, name='Flash objects',
  440 + description='Number of embedded Flash objects (SWF files) detected '
  441 + 'in OLE streams. Not 100% accurate, there may be false '
  442 + 'positives.')
279 self.indicators.append(flash) 443 self.indicators.append(flash)
  444 + if not self.ole:
  445 + return None
280 for stream in self.ole.listdir(): 446 for stream in self.ole.listdir():
281 data = self.ole.openstream(stream).read() 447 data = self.ole.openstream(stream).read()
282 found = detect_flash(data) 448 found = detect_flash(data)
283 # just add to the count of Flash objects: 449 # just add to the count of Flash objects:
284 flash.value += len(found) 450 flash.value += len(found)
285 #print stream, found 451 #print stream, found
  452 + return flash
286 453
287 454
288 #=== MAIN ================================================================= 455 #=== MAIN =================================================================
289 456
290 def main(): 457 def main():
  458 + """Called when running this file as script. Shows all info on input file."""
291 # print banner with version 459 # print banner with version
292 - print ('oleid %s - http://decalage.info/oletools' % __version__)  
293 - print ('THIS IS WORK IN PROGRESS - Check updates regularly!')  
294 - print ('Please report any issue at https://github.com/decalage2/oletools/issues')  
295 - print ('') 460 + print('oleid %s - http://decalage.info/oletools' % __version__)
  461 + print('THIS IS WORK IN PROGRESS - Check updates regularly!')
  462 + print('Please report any issue at '
  463 + 'https://github.com/decalage2/oletools/issues')
  464 + print('')
296 465
297 - usage = 'usage: %prog [options] <file>'  
298 - parser = optparse.OptionParser(usage=__doc__ + '\n' + usage)  
299 -## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream') 466 + parser = argparse.ArgumentParser(description=__doc__)
  467 + parser.add_argument('input', type=str, nargs='*', metavar='FILE',
  468 + help='Name of files to process')
  469 + # parser.add_argument('-o', '--ole', action='store_true', dest='ole',
  470 + # help='Parse an OLE file (e.g. Word, Excel) to look for '
  471 + # 'SWF in each stream')
300 472
301 - (options, args) = parser.parse_args() 473 + args = parser.parse_args()
302 474
303 # Print help if no argurments are passed 475 # Print help if no argurments are passed
304 - if len(args) == 0: 476 + if len(args.input) == 0:
305 parser.print_help() 477 parser.print_help()
306 return 478 return
307 479
308 - for filename in args: 480 + for filename in args.input:
309 print('Filename:', filename) 481 print('Filename:', filename)
310 oleid = OleID(filename) 482 oleid = OleID(filename)
311 indicators = oleid.check() 483 indicators = oleid.check()
312 484
313 #TODO: add description 485 #TODO: add description
314 #TODO: highlight suspicious indicators 486 #TODO: highlight suspicious indicators
315 - t = prettytable.PrettyTable(['Indicator', 'Value'])  
316 - t.align = 'l'  
317 - t.max_width = 39  
318 - #t.border = False 487 + table = prettytable.PrettyTable(['Indicator', 'Value'])
  488 + table.align = 'l'
  489 + table.max_width = 39
  490 + table.border = False
319 491
320 for indicator in indicators: 492 for indicator in indicators:
321 #print '%s: %s' % (indicator.name, indicator.value) 493 #print '%s: %s' % (indicator.name, indicator.value)
322 - t.add_row((indicator.name, indicator.value)) 494 + table.add_row((indicator.name, indicator.value))
323 495
324 - print(t)  
325 - print ('') 496 + print(table)
  497 + print('')
326 498
327 if __name__ == '__main__': 499 if __name__ == '__main__':
328 main() 500 main()
oletools/olevba.py
@@ -14,6 +14,7 @@ Supported formats: @@ -14,6 +14,7 @@ Supported formats:
14 - Word 2003 XML (.xml) 14 - Word 2003 XML (.xml)
15 - Word/Excel Single File Web Page / MHTML (.mht) 15 - Word/Excel Single File Web Page / MHTML (.mht)
16 - Publisher (.pub) 16 - Publisher (.pub)
  17 +- raises an error if run with files encrypted using MS Crypto API RC4
17 18
18 Author: Philippe Lagadec - http://www.decalage.info 19 Author: Philippe Lagadec - http://www.decalage.info
19 License: BSD, see source code or documentation 20 License: BSD, see source code or documentation
@@ -208,6 +209,7 @@ from __future__ import print_function @@ -208,6 +209,7 @@ from __future__ import print_function
208 # (issue #283) 209 # (issue #283)
209 # 2018-09-11 v0.54 PL: - olefile is now a dependency 210 # 2018-09-11 v0.54 PL: - olefile is now a dependency
210 # 2018-10-08 PL: - replace backspace before printing to console (issue #358) 211 # 2018-10-08 PL: - replace backspace before printing to console (issue #358)
  212 +# 2018-10-25 CH: - detect encryption and raise error if detected
211 213
212 __version__ = '0.54dev2' 214 __version__ = '0.54dev2'
213 215
@@ -309,6 +311,8 @@ from pyparsing import \ @@ -309,6 +311,8 @@ from pyparsing import \
309 from oletools import ppt_parser 311 from oletools import ppt_parser
310 from oletools import oleform 312 from oletools import oleform
311 from oletools import rtfobj 313 from oletools import rtfobj
  314 +from oletools import oleid
  315 +from oletools.common.errors import FileIsEncryptedError
312 316
313 317
314 # monkeypatch email to fix issue #32: 318 # monkeypatch email to fix issue #32:
@@ -472,6 +476,7 @@ RETURN_OPEN_ERROR = 5 @@ -472,6 +476,7 @@ RETURN_OPEN_ERROR = 5
472 RETURN_PARSE_ERROR = 6 476 RETURN_PARSE_ERROR = 6
473 RETURN_SEVERAL_ERRS = 7 477 RETURN_SEVERAL_ERRS = 7
474 RETURN_UNEXPECTED = 8 478 RETURN_UNEXPECTED = 8
  479 +RETURN_ENCRYPTED = 9
475 480
476 # MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python) 481 # MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python)
477 MAC_CODEPAGES = { 482 MAC_CODEPAGES = {
@@ -2367,6 +2372,12 @@ class VBA_Parser(object): @@ -2367,6 +2372,12 @@ class VBA_Parser(object):
2367 # This looks like an OLE file 2372 # This looks like an OLE file
2368 self.open_ole(_file) 2373 self.open_ole(_file)
2369 2374
  2375 + # check whether file is encrypted (need to do this before try ppt)
  2376 + log.debug('Check encryption of ole file')
  2377 + crypt_indicator = oleid.OleID(self.ole_file).check_encrypted()
  2378 + if crypt_indicator.value:
  2379 + raise FileIsEncryptedError(filename)
  2380 +
2370 # if this worked, try whether it is a ppt file (special ole file) 2381 # if this worked, try whether it is a ppt file (special ole file)
2371 self.open_ppt() 2382 self.open_ppt()
2372 if self.type is None and is_zipfile(_file): 2383 if self.type is None and is_zipfile(_file):
@@ -3634,6 +3645,16 @@ def main(cmd_line_args=None): @@ -3634,6 +3645,16 @@ def main(cmd_line_args=None):
3634 % (filename, exc.orig_exc)) 3645 % (filename, exc.orig_exc))
3635 return_code = RETURN_PARSE_ERROR if return_code == 0 \ 3646 return_code = RETURN_PARSE_ERROR if return_code == 0 \
3636 else RETURN_SEVERAL_ERRS 3647 else RETURN_SEVERAL_ERRS
  3648 + except FileIsEncryptedError as exc:
  3649 + if options.output_mode in ('triage', 'unspecified'):
  3650 + print('%-12s %s - File is encrypted' % ('!ERROR', filename))
  3651 + elif options.output_mode == 'json':
  3652 + print_json(file=filename, type='error',
  3653 + error=type(exc).__name__, message=str(exc))
  3654 + else:
  3655 + log.exception('File %s is encrypted!' % (filename))
  3656 + return_code = RETURN_ENCRYPTED if return_code == 0 \
  3657 + else RETURN_SEVERAL_ERRS
3637 # Here we do not close the vba_parser, because process_file may need it below. 3658 # Here we do not close the vba_parser, because process_file may need it below.
3638 3659
3639 if options.output_mode == 'triage': 3660 if options.output_mode == 'triage':
oletools/olevba3.py
@@ -16,6 +16,7 @@ Supported formats: @@ -16,6 +16,7 @@ Supported formats:
16 - Word 2003 XML (.xml) 16 - Word 2003 XML (.xml)
17 - Word/Excel Single File Web Page / MHTML (.mht) 17 - Word/Excel Single File Web Page / MHTML (.mht)
18 - Publisher (.pub) 18 - Publisher (.pub)
  19 +- raises an error if run with files encrypted using MS Crypto API RC4
19 20
20 Author: Philippe Lagadec - http://www.decalage.info 21 Author: Philippe Lagadec - http://www.decalage.info
21 License: BSD, see source code or documentation 22 License: BSD, see source code or documentation
@@ -207,6 +208,7 @@ from __future__ import print_function @@ -207,6 +208,7 @@ from __future__ import print_function
207 # 2018-06-11 v0.53.1 MHW: - fixed #320: chr instead of unichr on python 3 208 # 2018-06-11 v0.53.1 MHW: - fixed #320: chr instead of unichr on python 3
208 # 2018-06-12 MHW: - fixed #322: import reduce from functools 209 # 2018-06-12 MHW: - fixed #322: import reduce from functools
209 # 2018-09-11 v0.54 PL: - olefile is now a dependency 210 # 2018-09-11 v0.54 PL: - olefile is now a dependency
  211 +# 2018-10-25 CH: - detect encryption and raise error if detected
210 212
211 __version__ = '0.54dev1' 213 __version__ = '0.54dev1'
212 214
@@ -247,7 +249,6 @@ import os @@ -247,7 +249,6 @@ import os
247 import logging 249 import logging
248 import struct 250 import struct
249 from _io import StringIO,BytesIO 251 from _io import StringIO,BytesIO
250 -from oletools import rtfobj  
251 import math 252 import math
252 import zipfile 253 import zipfile
253 import re 254 import re
@@ -298,6 +299,9 @@ from pyparsing import \ @@ -298,6 +299,9 @@ from pyparsing import \
298 alphanums, alphas, hexnums,nums, opAssoc, srange, \ 299 alphanums, alphas, hexnums,nums, opAssoc, srange, \
299 infixNotation, ParserElement 300 infixNotation, ParserElement
300 import oletools.ppt_parser as ppt_parser 301 import oletools.ppt_parser as ppt_parser
  302 +from oletools import rtfobj
  303 +from oletools import oleid
  304 +from oletools.common.errors import FileIsEncryptedError
301 305
302 # monkeypatch email to fix issue #32: 306 # monkeypatch email to fix issue #32:
303 # allow header lines without ":" 307 # allow header lines without ":"
@@ -479,6 +483,7 @@ RETURN_OPEN_ERROR = 5 @@ -479,6 +483,7 @@ RETURN_OPEN_ERROR = 5
479 RETURN_PARSE_ERROR = 6 483 RETURN_PARSE_ERROR = 6
480 RETURN_SEVERAL_ERRS = 7 484 RETURN_SEVERAL_ERRS = 7
481 RETURN_UNEXPECTED = 8 485 RETURN_UNEXPECTED = 8
  486 +RETURN_ENCRYPTED = 9
482 487
483 # MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python) 488 # MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python)
484 MAC_CODEPAGES = { 489 MAC_CODEPAGES = {
@@ -2360,6 +2365,12 @@ class VBA_Parser(object): @@ -2360,6 +2365,12 @@ class VBA_Parser(object):
2360 # This looks like an OLE file 2365 # This looks like an OLE file
2361 self.open_ole(_file) 2366 self.open_ole(_file)
2362 2367
  2368 + # check whether file is encrypted (need to do this before try ppt)
  2369 + log.debug('Check encryption of ole file')
  2370 + crypt_indicator = oleid.OleID(self.ole_file).check_encrypted()
  2371 + if crypt_indicator.value:
  2372 + raise FileIsEncryptedError(filename)
  2373 +
2363 # if this worked, try whether it is a ppt file (special ole file) 2374 # if this worked, try whether it is a ppt file (special ole file)
2364 self.open_ppt() 2375 self.open_ppt()
2365 if self.type is None and is_zipfile(_file): 2376 if self.type is None and is_zipfile(_file):
@@ -3594,6 +3605,18 @@ def main(cmd_line_args=None): @@ -3594,6 +3605,18 @@ def main(cmd_line_args=None):
3594 % (filename, exc.orig_exc)) 3605 % (filename, exc.orig_exc))
3595 return_code = RETURN_PARSE_ERROR if return_code == 0 \ 3606 return_code = RETURN_PARSE_ERROR if return_code == 0 \
3596 else RETURN_SEVERAL_ERRS 3607 else RETURN_SEVERAL_ERRS
  3608 + except FileIsEncryptedError as exc:
  3609 + if options.output_mode in ('triage', 'unspecified'):
  3610 + print('%-12s %s - File is encrypted' % ('!ERROR', filename))
  3611 + elif options.output_mode == 'json':
  3612 + print_json(file=filename, type='error',
  3613 + error=type(exc).__name__, message=str(exc))
  3614 + else:
  3615 + log.exception('File %s is encrypted!' % (filename))
  3616 + return_code = RETURN_ENCRYPTED if return_code == 0 \
  3617 + else RETURN_SEVERAL_ERRS
  3618 + # Here we do not close the vba_parser, because process_file may need it below.
  3619 +
3597 finally: 3620 finally:
3598 if vba_parser is not None: 3621 if vba_parser is not None:
3599 vba_parser.close() 3622 vba_parser.close()
oletools/ooxml.py
@@ -9,6 +9,8 @@ See also: Notes on Microsoft&#39;s implementation of ECMA-376: [MS-0E376] @@ -9,6 +9,8 @@ See also: Notes on Microsoft&#39;s implementation of ECMA-376: [MS-0E376]
9 9
10 TODO: may have to tell apart single xml types: office2003 looks much different 10 TODO: may have to tell apart single xml types: office2003 looks much different
11 than 2006+ --> DOCTYPE_*_XML2003 11 than 2006+ --> DOCTYPE_*_XML2003
  12 +TODO: check what is duplicate here with oleid, maybe merge some day?
  13 +TODO: "xml2003" == "flatopc"?
12 14
13 .. codeauthor:: Intra2net AG <info@intra2net> 15 .. codeauthor:: Intra2net AG <info@intra2net>
14 """ 16 """
oletools/ppt_record_parser.py
@@ -63,6 +63,7 @@ except ImportError: @@ -63,6 +63,7 @@ except ImportError:
63 sys.path.insert(0, PARENT_DIR) 63 sys.path.insert(0, PARENT_DIR)
64 del PARENT_DIR 64 del PARENT_DIR
65 from oletools import record_base 65 from oletools import record_base
  66 +from oletools.common.errors import FileIsEncryptedError
66 67
67 68
68 # types of relevant records (there are much more than listed here) 69 # types of relevant records (there are much more than listed here)
@@ -147,13 +148,17 @@ def is_ppt(filename): @@ -147,13 +148,17 @@ def is_ppt(filename):
147 148
148 Param filename can be anything that OleFileIO constructor accepts: name of 149 Param filename can be anything that OleFileIO constructor accepts: name of
149 file or file data or data stream. 150 file or file data or data stream.
  151 +
  152 + see also: oleid.OleID.check_powerpoint
150 """ 153 """
151 have_current_user = False 154 have_current_user = False
152 have_user_edit = False 155 have_user_edit = False
153 have_persist_dir = False 156 have_persist_dir = False
154 have_document_container = False 157 have_document_container = False
  158 + ppt_file = None
155 try: 159 try:
156 - for stream in PptFile(filename).iter_streams(): 160 + ppt_file = PptFile(filename)
  161 + for stream in ppt_file.iter_streams():
157 if stream.name == 'Current User': 162 if stream.name == 'Current User':
158 for record in stream.iter_records(): 163 for record in stream.iter_records():
159 if isinstance(record, PptRecordCurrentUser): 164 if isinstance(record, PptRecordCurrentUser):
@@ -176,6 +181,11 @@ def is_ppt(filename): @@ -176,6 +181,11 @@ def is_ppt(filename):
176 return True 181 return True
177 else: # ignore other streams/storages since they are optional 182 else: # ignore other streams/storages since they are optional
178 continue 183 continue
  184 + except FileIsEncryptedError:
  185 + assert ppt_file is not None, \
  186 + 'Encryption error should not be raised from just opening OLE file.'
  187 + # just rely on stream names, copied from oleid
  188 + return ppt_file.exists('PowerPoint Document')
179 except Exception: 189 except Exception:
180 pass 190 pass
181 return False 191 return False
oletools/record_base.py
@@ -44,6 +44,7 @@ __version__ = &#39;0.54dev1&#39; @@ -44,6 +44,7 @@ __version__ = &#39;0.54dev1&#39;
44 # TODO: 44 # TODO:
45 # - read DocumentSummaryInformation first to get more info about streams 45 # - read DocumentSummaryInformation first to get more info about streams
46 # (maybe content type or so; identify streams that are never record-based) 46 # (maybe content type or so; identify streams that are never record-based)
  47 +# Or use oleid to avoid same functionality in several files
47 # - think about integrating this with olefile itself 48 # - think about integrating this with olefile itself
48 49
49 # ----------------------------------------------------------------------------- 50 # -----------------------------------------------------------------------------
@@ -62,6 +63,18 @@ import logging @@ -62,6 +63,18 @@ import logging
62 63
63 import olefile 64 import olefile
64 65
  66 +try:
  67 + from oletools.common.errors import FileIsEncryptedError
  68 +except ImportError:
  69 + # little hack to allow absolute imports even if oletools is not installed.
  70 + PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname(
  71 + os.path.abspath(__file__))))
  72 + if PARENT_DIR not in sys.path:
  73 + sys.path.insert(0, PARENT_DIR)
  74 + del PARENT_DIR
  75 + from oletools.common.errors import FileIsEncryptedError
  76 +from oletools import oleid
  77 +
65 78
66 ############################################################################### 79 ###############################################################################
67 # Helpers 80 # Helpers
@@ -111,6 +124,12 @@ class OleRecordFile(olefile.OleFileIO): @@ -111,6 +124,12 @@ class OleRecordFile(olefile.OleFileIO):
111 Subclass of OleFileIO! 124 Subclass of OleFileIO!
112 """ 125 """
113 126
  127 + def open(self, filename, *args, **kwargs):
  128 + """Call OleFileIO.open, raise error if is encrypted."""
  129 + #super(OleRecordFile, self).open(filename, *args, **kwargs)
  130 + OleFileIO.open(self, filename, *args, **kwargs)
  131 + self.is_encrypted = oleid.OleID(self).check_encrypted().value
  132 +
114 @classmethod 133 @classmethod
115 def stream_class_for_name(cls, stream_name): 134 def stream_class_for_name(cls, stream_name):
116 """ helper for iter_streams, must be overwritten in subclasses 135 """ helper for iter_streams, must be overwritten in subclasses
@@ -142,7 +161,8 @@ class OleRecordFile(olefile.OleFileIO): @@ -142,7 +161,8 @@ class OleRecordFile(olefile.OleFileIO):
142 stream = clz(self._open(direntry.isectStart, direntry.size), 161 stream = clz(self._open(direntry.isectStart, direntry.size),
143 direntry.size, 162 direntry.size,
144 None if is_orphan else direntry.name, 163 None if is_orphan else direntry.name,
145 - direntry.entry_type) 164 + direntry.entry_type,
  165 + self.is_encrypted)
146 yield stream 166 yield stream
147 stream.close() 167 stream.close()
148 168
@@ -155,13 +175,14 @@ class OleRecordStream(object): @@ -155,13 +175,14 @@ class OleRecordStream(object):
155 abstract base class 175 abstract base class
156 """ 176 """
157 177
158 - def __init__(self, stream, size, name, stream_type): 178 + def __init__(self, stream, size, name, stream_type, is_encrypted=False):
159 self.stream = stream 179 self.stream = stream
160 self.size = size 180 self.size = size
161 self.name = name 181 self.name = name
162 if stream_type not in ENTRY_TYPE2STR: 182 if stream_type not in ENTRY_TYPE2STR:
163 raise ValueError('Unknown stream type: {0}'.format(stream_type)) 183 raise ValueError('Unknown stream type: {0}'.format(stream_type))
164 self.stream_type = stream_type 184 self.stream_type = stream_type
  185 + self.is_encrypted = is_encrypted
165 186
166 def read_record_head(self): 187 def read_record_head(self):
167 """ read first few bytes of record to determine size and type 188 """ read first few bytes of record to determine size and type
@@ -190,6 +211,9 @@ class OleRecordStream(object): @@ -190,6 +211,9 @@ class OleRecordStream(object):
190 211
191 Stream must be positioned at start of records (e.g. start of stream). 212 Stream must be positioned at start of records (e.g. start of stream).
192 """ 213 """
  214 + if self.is_encrypted:
  215 + raise FileIsEncryptedError()
  216 +
193 while True: 217 while True:
194 # unpacking as in olevba._extract_vba 218 # unpacking as in olevba._extract_vba
195 pos = self.stream.tell() 219 pos = self.stream.tell()
@@ -234,6 +258,8 @@ class OleSummaryInformationStream(OleRecordStream): @@ -234,6 +258,8 @@ class OleSummaryInformationStream(OleRecordStream):
234 258
235 Do nothing so far. OleFileIO reads quite some info from this. For more info 259 Do nothing so far. OleFileIO reads quite some info from this. For more info
236 see [MS-OSHARED] 2.3.3 and [MS-OLEPS] 2.21 and references therein. 260 see [MS-OSHARED] 2.3.3 and [MS-OLEPS] 2.21 and references therein.
  261 +
  262 + See also: info read in oleid.py.
237 """ 263 """
238 def iter_records(self, fill_data=False): 264 def iter_records(self, fill_data=False):
239 """ yields nothing, stops at once """ 265 """ yields nothing, stops at once """
oletools/xls_parser.py
@@ -86,14 +86,16 @@ def is_xls(filename): @@ -86,14 +86,16 @@ def is_xls(filename):
86 returns True if given file is an ole file and contains a Workbook stream 86 returns True if given file is an ole file and contains a Workbook stream
87 87
88 todo: could further check that workbook stream starts with a globals 88 todo: could further check that workbook stream starts with a globals
89 - substream 89 + substream.
  90 + See also: oleid.OleID.check_excel
90 """ 91 """
91 try: 92 try:
92 for stream in XlsFile(filename).iter_streams(): 93 for stream in XlsFile(filename).iter_streams():
93 if isinstance(stream, WorkbookStream): 94 if isinstance(stream, WorkbookStream):
94 return True 95 return True
95 except Exception: 96 except Exception:
96 - return False 97 + pass
  98 + return False
97 99
98 100
99 def read_unicode(data, start_idx, n_chars): 101 def read_unicode(data, start_idx, n_chars):
@@ -130,6 +132,8 @@ class XlsFile(record_base.OleRecordFile): @@ -130,6 +132,8 @@ class XlsFile(record_base.OleRecordFile):
130 @classmethod 132 @classmethod
131 def stream_class_for_name(cls, stream_name): 133 def stream_class_for_name(cls, stream_name):
132 """ helper for iter_streams """ 134 """ helper for iter_streams """
  135 + if stream_name == 'Workbook':
  136 + return WorkbookStream
133 return XlsStream 137 return XlsStream
134 138
135 139
tests/msodde/test_basic.py
@@ -11,6 +11,7 @@ from __future__ import print_function @@ -11,6 +11,7 @@ from __future__ import print_function
11 import unittest 11 import unittest
12 from oletools import msodde 12 from oletools import msodde
13 from tests.test_utils import DATA_BASE_DIR as BASE_DIR 13 from tests.test_utils import DATA_BASE_DIR as BASE_DIR
  14 +import os
14 from os.path import join 15 from os.path import join
15 from traceback import print_exc 16 from traceback import print_exc
16 17
@@ -55,6 +56,20 @@ class TestReturnCode(unittest.TestCase): @@ -55,6 +56,20 @@ class TestReturnCode(unittest.TestCase):
55 """ check that text file argument leads to non-zero exit status """ 56 """ check that text file argument leads to non-zero exit status """
56 self.do_test_validity(join(BASE_DIR, 'basic/text'), True) 57 self.do_test_validity(join(BASE_DIR, 'basic/text'), True)
57 58
  59 + def test_encrypted(self):
  60 + """
  61 + check that encrypted files lead to non-zero exit status
  62 +
  63 + Currently, only the encryption applied by Office 2010 (CryptoApi RC4
  64 + Encryption) is tested.
  65 + """
  66 + CRYPT_DIR = join(BASE_DIR, 'encrypted')
  67 + ADD_ARGS = '', '-j', '-d', '-f', '-a'
  68 + for filename in os.listdir(CRYPT_DIR):
  69 + full_name = join(CRYPT_DIR, filename)
  70 + for args in ADD_ARGS:
  71 + self.do_test_validity(args + ' ' + full_name, True)
  72 +
58 def do_test_validity(self, args, expect_error=False): 73 def do_test_validity(self, args, expect_error=False):
59 """ helper for test_valid_doc[x] """ 74 """ helper for test_valid_doc[x] """
60 have_exception = False 75 have_exception = False
tests/oleid/test_basic.py 0 → 100644
  1 +"""
  2 +Test basic functionality of oleid
  3 +
  4 +Should work with python2 and python3!
  5 +"""
  6 +
  7 +import unittest
  8 +import os
  9 +from os.path import join, relpath, splitext
  10 +from oletools import oleid
  11 +
  12 +# Directory with test data, independent of current working directory
  13 +from tests.test_utils import DATA_BASE_DIR
  14 +
  15 +
  16 +class TestOleIDBasic(unittest.TestCase):
  17 + """Test basic functionality of OleID"""
  18 +
  19 + def test_all(self):
  20 + """Run all file in test-data through oleid and compare to known ouput"""
  21 + # this relies on order of indicators being constant, could relax that
  22 + # Also requires that files have the correct suffixes (no rtf in doc)
  23 + NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '')
  24 + NON_OLE_VALUES = (False, )
  25 + WORD = b'Microsoft Office Word'
  26 + PPT = b'Microsoft Office PowerPoint'
  27 + EXCEL = b'Microsoft Excel'
  28 + CRYPT = (True, False, 'unknown', True, False, False, False, False,
  29 + False, False, 0)
  30 + OLE_VALUES = {
  31 + 'oleobj/sample_with_lnk_file.doc': (True, True, WORD, False, True,
  32 + False, False, False, False,
  33 + True, 0),
  34 + 'oleobj/embedded-simple-2007.xlsb': (False,),
  35 + 'oleobj/embedded-simple-2007.docm': (False,),
  36 + 'oleobj/embedded-simple-2007.xltx': (False,),
  37 + 'oleobj/embedded-simple-2007.xlam': (False,),
  38 + 'oleobj/embedded-simple-2007.dotm': (False,),
  39 + 'oleobj/sample_with_lnk_file.ppt': (True, True, PPT, False, False,
  40 + False, False, True, False,
  41 + False, 0),
  42 + 'oleobj/embedded-simple-2007.xlsx': (False,),
  43 + 'oleobj/embedded-simple-2007.xlsm': (False,),
  44 + 'oleobj/embedded-simple-2007.ppsx': (False,),
  45 + 'oleobj/embedded-simple-2007.pps': (True, True, PPT, False, False,
  46 + False, False, True, False,
  47 + False, 0),
  48 + 'oleobj/embedded-simple-2007.xla': (True, True, EXCEL, False,
  49 + False, False, True, False,
  50 + False, False, 0),
  51 + 'oleobj/sample_with_calc_embedded.doc': (True, True, WORD, False,
  52 + True, False, False, False,
  53 + False, True, 0),
  54 + 'oleobj/embedded-unicode-2007.docx': (False,),
  55 + 'oleobj/embedded-unicode.doc': (True, True, WORD, False, True,
  56 + False, False, False, False, True,
  57 + 0),
  58 + 'oleobj/embedded-simple-2007.doc': (True, True, WORD, False, True,
  59 + False, False, False, False,
  60 + True, 0),
  61 + 'oleobj/embedded-simple-2007.xls': (True, True, EXCEL, False,
  62 + False, False, True, False,
  63 + False, False, 0),
  64 + 'oleobj/embedded-simple-2007.dot': (True, True, WORD, False, True,
  65 + False, False, False, False,
  66 + True, 0),
  67 + 'oleobj/sample_with_lnk_to_calc.doc': (True, True, WORD, False,
  68 + True, False, False, False,
  69 + False, True, 0),
  70 + 'oleobj/embedded-simple-2007.ppt': (True, True, PPT, False, False,
  71 + False, False, True, False,
  72 + False, 0),
  73 + 'oleobj/sample_with_lnk_file.pps': (True, True, PPT, False, False,
  74 + False, False, True, False,
  75 + False, 0),
  76 + 'oleobj/embedded-simple-2007.pptx': (False,),
  77 + 'oleobj/embedded-simple-2007.ppsm': (False,),
  78 + 'oleobj/embedded-simple-2007.dotx': (False,),
  79 + 'oleobj/embedded-simple-2007.pptm': (False,),
  80 + 'oleobj/embedded-simple-2007.xlt': (True, True, EXCEL, False,
  81 + False, False, True, False,
  82 + False, False, 0),
  83 + 'oleobj/embedded-simple-2007.docx': (False,),
  84 + 'oleobj/embedded-simple-2007.potx': (False,),
  85 + 'oleobj/embedded-simple-2007.pot': (True, True, PPT, False, False,
  86 + False, False, True, False,
  87 + False, 0),
  88 + 'oleobj/embedded-simple-2007.xltm': (False,),
  89 + 'oleobj/embedded-simple-2007.potm': (False,),
  90 + 'encrypted/encrypted.xlsx': CRYPT,
  91 + 'encrypted/encrypted.docm': CRYPT,
  92 + 'encrypted/encrypted.docx': CRYPT,
  93 + 'encrypted/encrypted.pptm': CRYPT,
  94 + 'encrypted/encrypted.xlsb': CRYPT,
  95 + 'encrypted/encrypted.xls': (True, True, EXCEL, True, False, False,
  96 + True, False, False, False, 0),
  97 + 'encrypted/encrypted.ppt': (True, False, 'unknown', True, False,
  98 + False, False, True, False, False, 0),
  99 + 'encrypted/encrypted.pptx': CRYPT,
  100 + 'encrypted/encrypted.xlsm': CRYPT,
  101 + 'encrypted/encrypted.doc': (True, True, WORD, True, True, False,
  102 + False, False, False, False, 0),
  103 + 'msodde/harmless-clean.docm': (False,),
  104 + 'msodde/dde-in-csv.csv': (False,),
  105 + 'msodde/dde-test-from-office2013-utf_16le-korean.doc':
  106 + (True, True, WORD, False, True, False, False, False, False,
  107 + False, 0),
  108 + 'msodde/harmless-clean.doc': (True, True, WORD, False, True, False,
  109 + False, False, False, False, 0),
  110 + 'msodde/dde-test.docm': (False,),
  111 + 'msodde/dde-test.xlsb': (False,),
  112 + 'msodde/dde-test.xlsm': (False,),
  113 + 'msodde/dde-test.docx': (False,),
  114 + 'msodde/dde-test.xlsx': (False,),
  115 + 'msodde/dde-test-from-office2003.doc': (True, True, WORD, False,
  116 + True, False, False, False,
  117 + False, False, 0),
  118 + 'msodde/dde-test-from-office2016.doc': (True, True, WORD, False,
  119 + True, False, False, False,
  120 + False, False, 0),
  121 + 'msodde/harmless-clean.docx': (False,),
  122 + 'oleform/oleform-PR314.docm': (False,),
  123 + 'basic/encrypted.docx': CRYPT,
  124 + }
  125 +
  126 + indicator_names = []
  127 + for base_dir, _, files in os.walk(DATA_BASE_DIR):
  128 + for filename in files:
  129 + full_path = join(base_dir, filename)
  130 + name = relpath(full_path, DATA_BASE_DIR)
  131 + values = tuple(indicator.value for indicator in
  132 + oleid.OleID(full_path).check())
  133 + if len(indicator_names) < 2: # not initialized with ole yet
  134 + indicator_names = tuple(indicator.name for indicator in
  135 + oleid.OleID(full_path).check())
  136 + suffix = splitext(filename)[1]
  137 + if suffix in NON_OLE_SUFFIXES:
  138 + self.assertEqual(values, NON_OLE_VALUES,
  139 + msg='For non-ole file {} expected {}, '
  140 + 'not {}'.format(name, NON_OLE_VALUES,
  141 + values))
  142 + continue
  143 + try:
  144 + self.assertEqual(values, OLE_VALUES[name],
  145 + msg='Wrong detail values for {}:\n'
  146 + ' Names {}\n Found {}\n Expect {}'
  147 + .format(name, indicator_names, values,
  148 + OLE_VALUES[name]))
  149 + except KeyError:
  150 + print('Should add oleid output for {} to {} ({})'
  151 + .format(name, __name__, values[3:]))
  152 +
  153 +# just in case somebody calls this file as a script
  154 +if __name__ == '__main__':
  155 + unittest.main()
tests/olevba/__init__.py 0 → 100644
tests/olevba/test_basic.py 0 → 100644
  1 +"""
  2 +Test basic functionality of olevba[3]
  3 +"""
  4 +
  5 +import unittest
  6 +import sys
  7 +if sys.version_info.major <= 2:
  8 + from oletools import olevba
  9 +else:
  10 + from oletools import olevba3 as olevba
  11 +import os
  12 +from os.path import join
  13 +
  14 +# Directory with test data, independent of current working directory
  15 +from tests.test_utils import DATA_BASE_DIR
  16 +
  17 +
  18 +class TestOlevbaBasic(unittest.TestCase):
  19 + """Tests olevba basic functionality"""
  20 +
  21 + def test_crypt_return(self):
  22 + """
  23 + Tests that encrypted files give a certain return code.
  24 +
  25 + Currently, only the encryption applied by Office 2010 (CryptoApi RC4
  26 + Encryption) is tested.
  27 + """
  28 + CRYPT_DIR = join(DATA_BASE_DIR, 'encrypted')
  29 + CRYPT_RETURN_CODE = 9
  30 + ADD_ARGS = [], ['-d', ], ['-a', ], ['-j', ], ['-t', ]
  31 + for filename in os.listdir(CRYPT_DIR):
  32 + full_name = join(CRYPT_DIR, filename)
  33 + for args in ADD_ARGS:
  34 + try:
  35 + ret_code = olevba.main(args + [full_name, ])
  36 + except SystemExit as se:
  37 + ret_code = se.code or 0 # se.code can be None
  38 + self.assertEqual(ret_code, CRYPT_RETURN_CODE,
  39 + msg='Wrong return code {} for args {}'
  40 + .format(ret_code, args + [filename, ]))
  41 +
  42 +
  43 +# just in case somebody calls this file as a script
  44 +if __name__ == '__main__':
  45 + unittest.main()
tests/test-data/encrypted/encrypted.doc 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.docm 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.docx 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.ppt 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.pptm 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.pptx 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xls 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xlsb 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xlsm 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xlsx 0 → 100644
No preview for this file type