Commit e8b6dd46b5b6221d8ff6725862f3b14fa15f2128

Authored by Philippe Lagadec
Committed by GitHub
2 parents d1f2a554 72684467

Merge pull request #362 from christian-intra2net/encrypt-detect-and-raise

Encrypt detect and raise
oletools/common/errors.py 0 → 100644
  1 +"""
  2 +Errors used in several tools to avoid duplication
  3 +
  4 +.. codeauthor:: Intra2net AG <info@intra2net.com>
  5 +"""
  6 +
  7 +class FileIsEncryptedError(ValueError):
  8 + """Exception thrown if file is encrypted and cannot deal with it."""
  9 + # see also: same class in olevba[3] and record_base
  10 + def __init__(self, filename=None):
  11 + super(FileIsEncryptedError, self).__init__(
  12 + 'Office file {}is encrypted, not yet supported'
  13 + .format('' if filename is None else filename + ' '))
... ...
oletools/msodde.py
... ... @@ -11,6 +11,7 @@ Supported formats:
11 11 - RTF
12 12 - CSV (exported from / imported into Excel)
13 13 - XML (exported from Word 2003, Word 2007+, Excel 2003, (Excel 2007+?)
  14 +- raises an error if run with files encrypted using MS Crypto API RC4
14 15  
15 16 Author: Philippe Lagadec - http://www.decalage.info
16 17 License: BSD, see source code or documentation
... ... @@ -61,7 +62,9 @@ import olefile
61 62 from oletools import ooxml
62 63 from oletools import xls_parser
63 64 from oletools import rtfobj
  65 +from oletools import oleid
64 66 from oletools.common.log_helper import log_helper
  67 +from oletools.common.errors import FileIsEncryptedError
65 68  
66 69 # -----------------------------------------------------------------------------
67 70 # CHANGELOG:
... ... @@ -84,6 +87,7 @@ from oletools.common.log_helper import log_helper
84 87 # 2018-01-10 CH: - add single-xml files (Word 2003/2007+ / Excel 2003)
85 88 # 2018-03-21 CH: - added detection for various CSV formulas (issue #259)
86 89 # 2018-09-11 v0.54 PL: - olefile is now a dependency
  90 +# 2018-10-25 CH: - detect encryption and raise error if detected
87 91  
88 92 __version__ = '0.54dev1'
89 93  
... ... @@ -438,17 +442,18 @@ def process_doc_stream(stream):
438 442 return result_parts
439 443  
440 444  
441   -def process_doc(filepath):
  445 +def process_doc(ole):
442 446 """
443 447 find dde links in word ole (.doc/.dot) file
444 448  
  449 + Checks whether files is ppt and returns empty immediately in that case
  450 + (ppt files cannot contain DDE-links to my knowledge)
  451 +
445 452 like process_xml, returns a concatenated unicode string of dde links or
446 453 empty if none were found. dde-links will still begin with the dde[auto] key
447 454 word (possibly after some whitespace)
448 455 """
449 456 logger.debug('process_doc')
450   - ole = olefile.OleFileIO(filepath, path_encoding=None)
451   -
452 457 links = []
453 458 for sid, direntry in enumerate(ole.direntries):
454 459 is_orphan = direntry is None
... ... @@ -703,8 +708,8 @@ def process_xlsx(filepath):
703 708 log_func = logger.debug
704 709 else: # default
705 710 log_func = logger.info
706   - log_func('Failed to parse {0} of content type {1}'
707   - .format(subfile, content_type))
  711 + log_func('Failed to parse {0} of content type {1} ("{2}")'
  712 + .format(subfile, content_type, str(exc)))
708 713 # in any case: continue with next
709 714  
710 715 return u'\n'.join(dde_links)
... ... @@ -886,9 +891,20 @@ def process_file(filepath, field_filter_mode=None):
886 891 if xls_parser.is_xls(filepath):
887 892 logger.debug('Process file as excel 2003 (xls)')
888 893 return process_xls(filepath)
  894 +
  895 + # encrypted files also look like ole, even if office 2007+ (xml-based)
  896 + # so check for encryption, first
  897 + ole = olefile.OleFileIO(filepath, path_encoding=None)
  898 + oid = oleid.OleID(ole)
  899 + if oid.check_encrypted().value:
  900 + log.debug('is encrypted - raise error')
  901 + raise FileIsEncryptedError(filepath)
  902 + elif oid.check_powerpoint().value:
  903 + log.debug('is ppt - cannot have DDE')
  904 + return u''
889 905 else:
890 906 logger.debug('Process file as word 2003 (doc)')
891   - return process_doc(filepath)
  907 + return process_doc(ole)
892 908  
893 909 with open(filepath, 'rb') as file_handle:
894 910 if file_handle.read(4) == RTF_START:
... ...
oletools/oleid.py
... ... @@ -6,9 +6,8 @@ oleid is a script to analyze OLE files such as MS Office documents (e.g. Word,
6 6 Excel), to detect specific characteristics that could potentially indicate that
7 7 the file is suspicious or malicious, in terms of security (e.g. malware).
8 8 For example it can detect VBA macros, embedded Flash objects, fragmentation.
9   -The results can be displayed or returned as XML for further processing.
10   -
11   -Usage: oleid.py <file>
  9 +The results is displayed as ascii table (but could be returned or printed in
  10 +other formats like CSV, XML or JSON in future).
12 11  
13 12 oleid project website: http://www.decalage.info/python/oleid
14 13  
... ... @@ -21,8 +20,8 @@ http://www.decalage.info/python/oletools
21 20 # oleid is copyright (c) 2012-2018, Philippe Lagadec (http://www.decalage.info)
22 21 # All rights reserved.
23 22 #
24   -# Redistribution and use in source and binary forms, with or without modification,
25   -# are permitted provided that the following conditions are met:
  23 +# Redistribution and use in source and binary forms, with or without
  24 +# modification, are permitted provided that the following conditions are met:
26 25 #
27 26 # * Redistributions of source code must retain the above copyright notice, this
28 27 # list of conditions and the following disclaimer.
... ... @@ -30,16 +29,17 @@ http://www.decalage.info/python/oletools
30 29 # this list of conditions and the following disclaimer in the documentation
31 30 # and/or other materials provided with the distribution.
32 31 #
33   -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
34   -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35   -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36   -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
37   -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38   -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39   -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
40   -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
41   -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
42   -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  33 +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  34 +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  35 +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  36 +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  37 +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  38 +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  39 +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  40 +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41 +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  42 +# POSSIBILITY OF SUCH DAMAGE.
43 43  
44 44 # To improve Python 2+3 compatibility:
45 45 from __future__ import print_function
... ... @@ -56,6 +56,8 @@ from __future__ import print_function
56 56 # 2017-04-26 PL: - fixed absolute imports (issue #141)
57 57 # 2017-09-01 SA: - detect OpenXML encryption
58 58 # 2018-09-11 v0.54 PL: - olefile is now a dependency
  59 +# 2018-10-19 CH: - accept olefile as well as filename, return Indicators,
  60 +# improve encryption detection for ppt
59 61  
60 62 __version__ = '0.54dev1'
61 63  
... ... @@ -78,28 +80,27 @@ __version__ = &#39;0.54dev1&#39;
78 80  
79 81 #=== IMPORTS =================================================================
80 82  
81   -import optparse, sys, os, re, zlib, struct
  83 +import argparse, sys, re, zlib, struct
  84 +from os.path import dirname, abspath
82 85  
83   -# IMPORTANT: it should be possible to run oletools directly as scripts
84   -# in any directory without installing them with pip or setup.py.
85   -# In that case, relative imports are NOT usable.
86   -# And to enable Python 2+3 compatibility, we need to use absolute imports,
87   -# so we add the oletools parent folder to sys.path (absolute+normalized path):
88   -_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
89   -# print('_thismodule_dir = %r' % _thismodule_dir)
90   -_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
91   -# print('_parent_dir = %r' % _thirdparty_dir)
92   -if not _parent_dir in sys.path:
93   - sys.path.insert(0, _parent_dir)
  86 +# little hack to allow absolute imports even if oletools is not installed
  87 +# (required to run oletools directly as scripts in any directory).
  88 +try:
  89 + from oletools.thirdparty import prettytable
  90 +except ImportError:
  91 + PARENT_DIR = dirname(dirname(abspath(__file__)))
  92 + if PARENT_DIR not in sys.path:
  93 + sys.path.insert(0, PARENT_DIR)
  94 + del PARENT_DIR
  95 + from oletools.thirdparty import prettytable
94 96  
95 97 import olefile
96   -from oletools.thirdparty.prettytable import prettytable
97 98  
98 99  
99 100  
100 101 #=== FUNCTIONS ===============================================================
101 102  
102   -def detect_flash (data):
  103 +def detect_flash(data):
103 104 """
104 105 Detect Flash objects (SWF files) within a binary string of data
105 106 return a list of (start_index, length, compressed) tuples, or [] if nothing
... ... @@ -141,7 +142,7 @@ def detect_flash (data):
141 142 compressed_data = swf[8:]
142 143 try:
143 144 zlib.decompress(compressed_data)
144   - except:
  145 + except Exception:
145 146 continue
146 147 # else we don't check anything at this stage, we only assume it is a
147 148 # valid SWF. So there might be false positives for uncompressed SWF.
... ... @@ -152,9 +153,15 @@ def detect_flash (data):
152 153  
153 154 #=== CLASSES =================================================================
154 155  
155   -class Indicator (object):
  156 +class Indicator(object):
  157 + """
  158 + Piece of information of an :py:class:`OleID` object.
  159 +
  160 + Contains an ID, value, type, name and description. No other functionality.
  161 + """
156 162  
157   - def __init__(self, _id, value=None, _type=bool, name=None, description=None):
  163 + def __init__(self, _id, value=None, _type=bool, name=None,
  164 + description=None):
158 165 self.id = _id
159 166 self.value = value
160 167 self.type = _type
... ... @@ -164,21 +171,55 @@ class Indicator (object):
164 171 self.description = description
165 172  
166 173  
167   -class OleID:
  174 +class OleID(object):
  175 + """
  176 + Summary of information about an OLE file
168 177  
169   - def __init__(self, filename):
170   - self.filename = filename
  178 + Call :py:meth:`OleID.check` to gather all info on a given file or run one
  179 + of the `check_` functions to just get a specific piece of info.
  180 + """
  181 +
  182 + def __init__(self, input_file):
  183 + """
  184 + Create an OleID object
  185 +
  186 + This does not run any checks yet nor open the file.
  187 +
  188 + Can either give just a filename (as str), so OleID will check whether
  189 + that is a valid OLE file and create a :py:class:`olefile.OleFileIO`
  190 + object for it. Or you can give an already opened
  191 + :py:class:`olefile.OleFileIO` as argument to avoid re-opening (e.g. if
  192 + called from other oletools).
  193 +
  194 + If filename is given, only :py:meth:`OleID.check` opens the file. Other
  195 + functions will return None
  196 + """
  197 + if isinstance(input_file, olefile.OleFileIO):
  198 + self.ole = input_file
  199 + self.filename = None
  200 + else:
  201 + self.filename = input_file
  202 + self.ole = None
171 203 self.indicators = []
  204 + self.suminfo_data = None
172 205  
173 206 def check(self):
  207 + """
  208 + Open file and run all checks on it.
  209 +
  210 + :returns: list of all :py:class:`Indicator`s created
  211 + """
174 212 # check if it is actually an OLE file:
175 213 oleformat = Indicator('ole_format', True, name='OLE format')
176 214 self.indicators.append(oleformat)
177   - if not olefile.isOleFile(self.filename):
  215 + if self.ole:
  216 + oleformat.value = True
  217 + elif not olefile.isOleFile(self.filename):
178 218 oleformat.value = False
179 219 return self.indicators
180   - # parse file:
181   - self.ole = olefile.OleFileIO(self.filename)
  220 + else:
  221 + # parse file:
  222 + self.ole = olefile.OleFileIO(self.filename)
182 223 # checks:
183 224 self.check_properties()
184 225 self.check_encrypted()
... ... @@ -186,143 +227,274 @@ class OleID:
186 227 self.check_excel()
187 228 self.check_powerpoint()
188 229 self.check_visio()
189   - self.check_ObjectPool()
  230 + self.check_object_pool()
190 231 self.check_flash()
191 232 self.ole.close()
192 233 return self.indicators
193 234  
194   - def check_properties (self):
195   - suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream')
  235 + def check_properties(self):
  236 + """
  237 + Read summary information required for other check_* functions
  238 +
  239 + :returns: 2 :py:class:`Indicator`s (for presence of summary info and
  240 + application name) or None if file was not opened
  241 + """
  242 + suminfo = Indicator('has_suminfo', False,
  243 + name='Has SummaryInformation stream')
196 244 self.indicators.append(suminfo)
197   - appname = Indicator('appname', 'unknown', _type=str, name='Application name')
  245 + appname = Indicator('appname', 'unknown', _type=str,
  246 + name='Application name')
198 247 self.indicators.append(appname)
199   - self.suminfo = {}
200   - # check stream SummaryInformation
  248 + if not self.ole:
  249 + return None, None
  250 + self.suminfo_data = {}
  251 + # check stream SummaryInformation (not present e.g. in encrypted ppt)
201 252 if self.ole.exists("\x05SummaryInformation"):
202 253 suminfo.value = True
203   - self.suminfo = self.ole.getproperties("\x05SummaryInformation")
  254 + self.suminfo_data = self.ole.getproperties("\x05SummaryInformation")
204 255 # check application name:
205   - appname.value = self.suminfo.get(0x12, 'unknown')
206   -
207   - def check_encrypted (self):
  256 + appname.value = self.suminfo_data.get(0x12, 'unknown')
  257 + return suminfo, appname
  258 +
  259 + def get_indicator(self, indicator_id):
  260 + """Helper function: returns an indicator if present (or None)"""
  261 + result = [indicator for indicator in self.indicators
  262 + if indicator.id == indicator_id]
  263 + if result:
  264 + return result[0]
  265 + else:
  266 + return None
  267 +
  268 + def check_encrypted(self):
  269 + """
  270 + Check whether this file is encrypted.
  271 +
  272 + Might call check_properties.
  273 +
  274 + :returns: :py:class:`Indicator` for encryption or None if file was not
  275 + opened
  276 + """
208 277 # we keep the pointer to the indicator, can be modified by other checks:
209   - self.encrypted = Indicator('encrypted', False, name='Encrypted')
210   - self.indicators.append(self.encrypted)
  278 + encrypted = Indicator('encrypted', False, name='Encrypted')
  279 + self.indicators.append(encrypted)
  280 + if not self.ole:
  281 + return None
211 282 # check if bit 1 of security field = 1:
212 283 # (this field may be missing for Powerpoint2000, for example)
213   - if 0x13 in self.suminfo:
214   - if self.suminfo[0x13] & 1:
215   - self.encrypted.value = True
  284 + if self.suminfo_data is None:
  285 + self.check_properties()
  286 + if 0x13 in self.suminfo_data:
  287 + if self.suminfo_data[0x13] & 1:
  288 + encrypted.value = True
216 289 # check if this is an OpenXML encrypted file
217 290 elif self.ole.exists('EncryptionInfo'):
218   - self.encrypted.value = True
219   -
220   - def check_word (self):
221   - word = Indicator('word', False, name='Word Document',
222   - description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.')
  291 + encrypted.value = True
  292 + # or an encrypted ppt file
  293 + if self.ole.exists('EncryptedSummary') and \
  294 + not self.ole.exists('SummaryInformation'):
  295 + encrypted.value = True
  296 + return encrypted
  297 +
  298 + def check_word(self):
  299 + """
  300 + Check whether this file is a word document
  301 +
  302 + If this finds evidence of encryption, will correct/add encryption
  303 + indicator.
  304 +
  305 + :returns: 2 :py:class:`Indicator`s (for word and vba_macro) or None if
  306 + file was not opened
  307 + """
  308 + word = Indicator(
  309 + 'word', False, name='Word Document',
  310 + description='Contains a WordDocument stream, very likely to be a '
  311 + 'Microsoft Word Document.')
223 312 self.indicators.append(word)
224   - self.macros = Indicator('vba_macros', False, name='VBA Macros')
225   - self.indicators.append(self.macros)
  313 + macros = Indicator('vba_macros', False, name='VBA Macros')
  314 + self.indicators.append(macros)
  315 + if not self.ole:
  316 + return None, None
226 317 if self.ole.exists('WordDocument'):
227 318 word.value = True
228 319 # check for Word-specific encryption flag:
229   - s = self.ole.openstream(["WordDocument"])
230   - # pass header 10 bytes
231   - s.read(10)
232   - # read flag structure:
233   - temp16 = struct.unpack("H", s.read(2))[0]
234   - fEncrypted = (temp16 & 0x0100) >> 8
235   - if fEncrypted:
236   - self.encrypted.value = True
237   - s.close()
  320 + stream = None
  321 + try:
  322 + stream = self.ole.openstream(["WordDocument"])
  323 + # pass header 10 bytes
  324 + stream.read(10)
  325 + # read flag structure:
  326 + temp16 = struct.unpack("H", stream.read(2))[0]
  327 + f_encrypted = (temp16 & 0x0100) >> 8
  328 + if f_encrypted:
  329 + # correct encrypted indicator if present or add one
  330 + encrypt_ind = self.get_indicator('encrypted')
  331 + if encrypt_ind:
  332 + encrypt_ind.value = True
  333 + else:
  334 + self.indicators.append('encrypted', True, name='Encrypted')
  335 + except Exception:
  336 + raise
  337 + finally:
  338 + if stream is not None:
  339 + stream.close()
238 340 # check for VBA macros:
239 341 if self.ole.exists('Macros'):
240   - self.macros.value = True
  342 + macros.value = True
  343 + return word, macros
  344 +
  345 + def check_excel(self):
  346 + """
  347 + Check whether this file is an excel workbook.
  348 +
  349 + If this finds macros, will add/correct macro indicator.
241 350  
242   - def check_excel (self):
243   - excel = Indicator('excel', False, name='Excel Workbook',
244   - description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.')
  351 + see also: :py:func:`xls_parser.is_xls`
  352 +
  353 + :returns: :py:class:`Indicator` for excel or (None, None) if file was
  354 + not opened
  355 + """
  356 + excel = Indicator(
  357 + 'excel', False, name='Excel Workbook',
  358 + description='Contains a Workbook or Book stream, very likely to be '
  359 + 'a Microsoft Excel Workbook.')
245 360 self.indicators.append(excel)
  361 + if not self.ole:
  362 + return None
246 363 #self.macros = Indicator('vba_macros', False, name='VBA Macros')
247 364 #self.indicators.append(self.macros)
248 365 if self.ole.exists('Workbook') or self.ole.exists('Book'):
249 366 excel.value = True
250 367 # check for VBA macros:
251 368 if self.ole.exists('_VBA_PROJECT_CUR'):
252   - self.macros.value = True
253   -
254   - def check_powerpoint (self):
255   - ppt = Indicator('ppt', False, name='PowerPoint Presentation',
256   - description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.')
  369 + # correct macro indicator if present or add one
  370 + macro_ind = self.get_indicator('vba_macros')
  371 + if macro_ind:
  372 + macro_ind.value = True
  373 + else:
  374 + self.indicators.append('vba_macros', True,
  375 + name='VBA Macros')
  376 + return excel
  377 +
  378 + def check_powerpoint(self):
  379 + """
  380 + Check whether this file is a powerpoint presentation
  381 +
  382 + see also: :py:func:`ppt_record_parser.is_ppt`
  383 +
  384 + :returns: :py:class:`Indicator` for whether this is a powerpoint
  385 + presentation or not or None if file was not opened
  386 + """
  387 + ppt = Indicator(
  388 + 'ppt', False, name='PowerPoint Presentation',
  389 + description='Contains a PowerPoint Document stream, very likely to '
  390 + 'be a Microsoft PowerPoint Presentation.')
257 391 self.indicators.append(ppt)
  392 + if not self.ole:
  393 + return None
258 394 if self.ole.exists('PowerPoint Document'):
259 395 ppt.value = True
260   -
261   - def check_visio (self):
262   - visio = Indicator('visio', False, name='Visio Drawing',
263   - description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.')
  396 + return ppt
  397 +
  398 + def check_visio(self):
  399 + """Check whether this file is a visio drawing"""
  400 + visio = Indicator(
  401 + 'visio', False, name='Visio Drawing',
  402 + description='Contains a VisioDocument stream, very likely to be a '
  403 + 'Microsoft Visio Drawing.')
264 404 self.indicators.append(visio)
  405 + if not self.ole:
  406 + return None
265 407 if self.ole.exists('VisioDocument'):
266 408 visio.value = True
  409 + return visio
  410 +
  411 + def check_object_pool(self):
  412 + """
  413 + Check whether this file contains an ObjectPool stream.
  414 +
  415 + Such a stream would be a strong indicator for embedded objects or files.
267 416  
268   - def check_ObjectPool (self):
269   - objpool = Indicator('ObjectPool', False, name='ObjectPool',
270   - description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.')
  417 + :returns: :py:class:`Indicator` for ObjectPool stream or None if file
  418 + was not opened
  419 + """
  420 + objpool = Indicator(
  421 + 'ObjectPool', False, name='ObjectPool',
  422 + description='Contains an ObjectPool stream, very likely to contain '
  423 + 'embedded OLE objects or files.')
271 424 self.indicators.append(objpool)
  425 + if not self.ole:
  426 + return None
272 427 if self.ole.exists('ObjectPool'):
273 428 objpool.value = True
274   -
275   -
276   - def check_flash (self):
277   - flash = Indicator('flash', 0, _type=int, name='Flash objects',
278   - description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.')
  429 + return objpool
  430 +
  431 + def check_flash(self):
  432 + """
  433 + Check whether this file contains flash objects
  434 +
  435 + :returns: :py:class:`Indicator` for count of flash objects or None if
  436 + file was not opened
  437 + """
  438 + flash = Indicator(
  439 + 'flash', 0, _type=int, name='Flash objects',
  440 + description='Number of embedded Flash objects (SWF files) detected '
  441 + 'in OLE streams. Not 100% accurate, there may be false '
  442 + 'positives.')
279 443 self.indicators.append(flash)
  444 + if not self.ole:
  445 + return None
280 446 for stream in self.ole.listdir():
281 447 data = self.ole.openstream(stream).read()
282 448 found = detect_flash(data)
283 449 # just add to the count of Flash objects:
284 450 flash.value += len(found)
285 451 #print stream, found
  452 + return flash
286 453  
287 454  
288 455 #=== MAIN =================================================================
289 456  
290 457 def main():
  458 + """Called when running this file as script. Shows all info on input file."""
291 459 # print banner with version
292   - print ('oleid %s - http://decalage.info/oletools' % __version__)
293   - print ('THIS IS WORK IN PROGRESS - Check updates regularly!')
294   - print ('Please report any issue at https://github.com/decalage2/oletools/issues')
295   - print ('')
  460 + print('oleid %s - http://decalage.info/oletools' % __version__)
  461 + print('THIS IS WORK IN PROGRESS - Check updates regularly!')
  462 + print('Please report any issue at '
  463 + 'https://github.com/decalage2/oletools/issues')
  464 + print('')
296 465  
297   - usage = 'usage: %prog [options] <file>'
298   - parser = optparse.OptionParser(usage=__doc__ + '\n' + usage)
299   -## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream')
  466 + parser = argparse.ArgumentParser(description=__doc__)
  467 + parser.add_argument('input', type=str, nargs='*', metavar='FILE',
  468 + help='Name of files to process')
  469 + # parser.add_argument('-o', '--ole', action='store_true', dest='ole',
  470 + # help='Parse an OLE file (e.g. Word, Excel) to look for '
  471 + # 'SWF in each stream')
300 472  
301   - (options, args) = parser.parse_args()
  473 + args = parser.parse_args()
302 474  
303 475 # Print help if no argurments are passed
304   - if len(args) == 0:
  476 + if len(args.input) == 0:
305 477 parser.print_help()
306 478 return
307 479  
308   - for filename in args:
  480 + for filename in args.input:
309 481 print('Filename:', filename)
310 482 oleid = OleID(filename)
311 483 indicators = oleid.check()
312 484  
313 485 #TODO: add description
314 486 #TODO: highlight suspicious indicators
315   - t = prettytable.PrettyTable(['Indicator', 'Value'])
316   - t.align = 'l'
317   - t.max_width = 39
318   - #t.border = False
  487 + table = prettytable.PrettyTable(['Indicator', 'Value'])
  488 + table.align = 'l'
  489 + table.max_width = 39
  490 + table.border = False
319 491  
320 492 for indicator in indicators:
321 493 #print '%s: %s' % (indicator.name, indicator.value)
322   - t.add_row((indicator.name, indicator.value))
  494 + table.add_row((indicator.name, indicator.value))
323 495  
324   - print(t)
325   - print ('')
  496 + print(table)
  497 + print('')
326 498  
327 499 if __name__ == '__main__':
328 500 main()
... ...
oletools/olevba.py
... ... @@ -14,6 +14,7 @@ Supported formats:
14 14 - Word 2003 XML (.xml)
15 15 - Word/Excel Single File Web Page / MHTML (.mht)
16 16 - Publisher (.pub)
  17 +- raises an error if run with files encrypted using MS Crypto API RC4
17 18  
18 19 Author: Philippe Lagadec - http://www.decalage.info
19 20 License: BSD, see source code or documentation
... ... @@ -208,6 +209,7 @@ from __future__ import print_function
208 209 # (issue #283)
209 210 # 2018-09-11 v0.54 PL: - olefile is now a dependency
210 211 # 2018-10-08 PL: - replace backspace before printing to console (issue #358)
  212 +# 2018-10-25 CH: - detect encryption and raise error if detected
211 213  
212 214 __version__ = '0.54dev2'
213 215  
... ... @@ -309,6 +311,8 @@ from pyparsing import \
309 311 from oletools import ppt_parser
310 312 from oletools import oleform
311 313 from oletools import rtfobj
  314 +from oletools import oleid
  315 +from oletools.common.errors import FileIsEncryptedError
312 316  
313 317  
314 318 # monkeypatch email to fix issue #32:
... ... @@ -472,6 +476,7 @@ RETURN_OPEN_ERROR = 5
472 476 RETURN_PARSE_ERROR = 6
473 477 RETURN_SEVERAL_ERRS = 7
474 478 RETURN_UNEXPECTED = 8
  479 +RETURN_ENCRYPTED = 9
475 480  
476 481 # MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python)
477 482 MAC_CODEPAGES = {
... ... @@ -2367,6 +2372,12 @@ class VBA_Parser(object):
2367 2372 # This looks like an OLE file
2368 2373 self.open_ole(_file)
2369 2374  
  2375 + # check whether file is encrypted (need to do this before try ppt)
  2376 + log.debug('Check encryption of ole file')
  2377 + crypt_indicator = oleid.OleID(self.ole_file).check_encrypted()
  2378 + if crypt_indicator.value:
  2379 + raise FileIsEncryptedError(filename)
  2380 +
2370 2381 # if this worked, try whether it is a ppt file (special ole file)
2371 2382 self.open_ppt()
2372 2383 if self.type is None and is_zipfile(_file):
... ... @@ -3634,6 +3645,16 @@ def main(cmd_line_args=None):
3634 3645 % (filename, exc.orig_exc))
3635 3646 return_code = RETURN_PARSE_ERROR if return_code == 0 \
3636 3647 else RETURN_SEVERAL_ERRS
  3648 + except FileIsEncryptedError as exc:
  3649 + if options.output_mode in ('triage', 'unspecified'):
  3650 + print('%-12s %s - File is encrypted' % ('!ERROR', filename))
  3651 + elif options.output_mode == 'json':
  3652 + print_json(file=filename, type='error',
  3653 + error=type(exc).__name__, message=str(exc))
  3654 + else:
  3655 + log.exception('File %s is encrypted!' % (filename))
  3656 + return_code = RETURN_ENCRYPTED if return_code == 0 \
  3657 + else RETURN_SEVERAL_ERRS
3637 3658 # Here we do not close the vba_parser, because process_file may need it below.
3638 3659  
3639 3660 if options.output_mode == 'triage':
... ...
oletools/olevba3.py
... ... @@ -16,6 +16,7 @@ Supported formats:
16 16 - Word 2003 XML (.xml)
17 17 - Word/Excel Single File Web Page / MHTML (.mht)
18 18 - Publisher (.pub)
  19 +- raises an error if run with files encrypted using MS Crypto API RC4
19 20  
20 21 Author: Philippe Lagadec - http://www.decalage.info
21 22 License: BSD, see source code or documentation
... ... @@ -207,6 +208,7 @@ from __future__ import print_function
207 208 # 2018-06-11 v0.53.1 MHW: - fixed #320: chr instead of unichr on python 3
208 209 # 2018-06-12 MHW: - fixed #322: import reduce from functools
209 210 # 2018-09-11 v0.54 PL: - olefile is now a dependency
  211 +# 2018-10-25 CH: - detect encryption and raise error if detected
210 212  
211 213 __version__ = '0.54dev1'
212 214  
... ... @@ -247,7 +249,6 @@ import os
247 249 import logging
248 250 import struct
249 251 from _io import StringIO,BytesIO
250   -from oletools import rtfobj
251 252 import math
252 253 import zipfile
253 254 import re
... ... @@ -298,6 +299,9 @@ from pyparsing import \
298 299 alphanums, alphas, hexnums,nums, opAssoc, srange, \
299 300 infixNotation, ParserElement
300 301 import oletools.ppt_parser as ppt_parser
  302 +from oletools import rtfobj
  303 +from oletools import oleid
  304 +from oletools.common.errors import FileIsEncryptedError
301 305  
302 306 # monkeypatch email to fix issue #32:
303 307 # allow header lines without ":"
... ... @@ -479,6 +483,7 @@ RETURN_OPEN_ERROR = 5
479 483 RETURN_PARSE_ERROR = 6
480 484 RETURN_SEVERAL_ERRS = 7
481 485 RETURN_UNEXPECTED = 8
  486 +RETURN_ENCRYPTED = 9
482 487  
483 488 # MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python)
484 489 MAC_CODEPAGES = {
... ... @@ -2360,6 +2365,12 @@ class VBA_Parser(object):
2360 2365 # This looks like an OLE file
2361 2366 self.open_ole(_file)
2362 2367  
  2368 + # check whether file is encrypted (need to do this before try ppt)
  2369 + log.debug('Check encryption of ole file')
  2370 + crypt_indicator = oleid.OleID(self.ole_file).check_encrypted()
  2371 + if crypt_indicator.value:
  2372 + raise FileIsEncryptedError(filename)
  2373 +
2363 2374 # if this worked, try whether it is a ppt file (special ole file)
2364 2375 self.open_ppt()
2365 2376 if self.type is None and is_zipfile(_file):
... ... @@ -3594,6 +3605,18 @@ def main(cmd_line_args=None):
3594 3605 % (filename, exc.orig_exc))
3595 3606 return_code = RETURN_PARSE_ERROR if return_code == 0 \
3596 3607 else RETURN_SEVERAL_ERRS
  3608 + except FileIsEncryptedError as exc:
  3609 + if options.output_mode in ('triage', 'unspecified'):
  3610 + print('%-12s %s - File is encrypted' % ('!ERROR', filename))
  3611 + elif options.output_mode == 'json':
  3612 + print_json(file=filename, type='error',
  3613 + error=type(exc).__name__, message=str(exc))
  3614 + else:
  3615 + log.exception('File %s is encrypted!' % (filename))
  3616 + return_code = RETURN_ENCRYPTED if return_code == 0 \
  3617 + else RETURN_SEVERAL_ERRS
  3618 + # Here we do not close the vba_parser, because process_file may need it below.
  3619 +
3597 3620 finally:
3598 3621 if vba_parser is not None:
3599 3622 vba_parser.close()
... ...
oletools/ooxml.py
... ... @@ -9,6 +9,8 @@ See also: Notes on Microsoft&#39;s implementation of ECMA-376: [MS-0E376]
9 9  
10 10 TODO: may have to tell apart single xml types: office2003 looks much different
11 11 than 2006+ --> DOCTYPE_*_XML2003
  12 +TODO: check what is duplicate here with oleid, maybe merge some day?
  13 +TODO: "xml2003" == "flatopc"?
12 14  
13 15 .. codeauthor:: Intra2net AG <info@intra2net>
14 16 """
... ...
oletools/ppt_record_parser.py
... ... @@ -63,6 +63,7 @@ except ImportError:
63 63 sys.path.insert(0, PARENT_DIR)
64 64 del PARENT_DIR
65 65 from oletools import record_base
  66 +from oletools.common.errors import FileIsEncryptedError
66 67  
67 68  
68 69 # types of relevant records (there are much more than listed here)
... ... @@ -147,13 +148,17 @@ def is_ppt(filename):
147 148  
148 149 Param filename can be anything that OleFileIO constructor accepts: name of
149 150 file or file data or data stream.
  151 +
  152 + see also: oleid.OleID.check_powerpoint
150 153 """
151 154 have_current_user = False
152 155 have_user_edit = False
153 156 have_persist_dir = False
154 157 have_document_container = False
  158 + ppt_file = None
155 159 try:
156   - for stream in PptFile(filename).iter_streams():
  160 + ppt_file = PptFile(filename)
  161 + for stream in ppt_file.iter_streams():
157 162 if stream.name == 'Current User':
158 163 for record in stream.iter_records():
159 164 if isinstance(record, PptRecordCurrentUser):
... ... @@ -176,6 +181,11 @@ def is_ppt(filename):
176 181 return True
177 182 else: # ignore other streams/storages since they are optional
178 183 continue
  184 + except FileIsEncryptedError:
  185 + assert ppt_file is not None, \
  186 + 'Encryption error should not be raised from just opening OLE file.'
  187 + # just rely on stream names, copied from oleid
  188 + return ppt_file.exists('PowerPoint Document')
179 189 except Exception:
180 190 pass
181 191 return False
... ...
oletools/record_base.py
... ... @@ -44,6 +44,7 @@ __version__ = &#39;0.54dev1&#39;
44 44 # TODO:
45 45 # - read DocumentSummaryInformation first to get more info about streams
46 46 # (maybe content type or so; identify streams that are never record-based)
  47 +# Or use oleid to avoid same functionality in several files
47 48 # - think about integrating this with olefile itself
48 49  
49 50 # -----------------------------------------------------------------------------
... ... @@ -62,6 +63,18 @@ import logging
62 63  
63 64 import olefile
64 65  
  66 +try:
  67 + from oletools.common.errors import FileIsEncryptedError
  68 +except ImportError:
  69 + # little hack to allow absolute imports even if oletools is not installed.
  70 + PARENT_DIR = os.path.normpath(os.path.dirname(os.path.dirname(
  71 + os.path.abspath(__file__))))
  72 + if PARENT_DIR not in sys.path:
  73 + sys.path.insert(0, PARENT_DIR)
  74 + del PARENT_DIR
  75 + from oletools.common.errors import FileIsEncryptedError
  76 +from oletools import oleid
  77 +
65 78  
66 79 ###############################################################################
67 80 # Helpers
... ... @@ -111,6 +124,12 @@ class OleRecordFile(olefile.OleFileIO):
111 124 Subclass of OleFileIO!
112 125 """
113 126  
  127 + def open(self, filename, *args, **kwargs):
  128 + """Call OleFileIO.open, raise error if is encrypted."""
  129 + #super(OleRecordFile, self).open(filename, *args, **kwargs)
  130 + OleFileIO.open(self, filename, *args, **kwargs)
  131 + self.is_encrypted = oleid.OleID(self).check_encrypted().value
  132 +
114 133 @classmethod
115 134 def stream_class_for_name(cls, stream_name):
116 135 """ helper for iter_streams, must be overwritten in subclasses
... ... @@ -142,7 +161,8 @@ class OleRecordFile(olefile.OleFileIO):
142 161 stream = clz(self._open(direntry.isectStart, direntry.size),
143 162 direntry.size,
144 163 None if is_orphan else direntry.name,
145   - direntry.entry_type)
  164 + direntry.entry_type,
  165 + self.is_encrypted)
146 166 yield stream
147 167 stream.close()
148 168  
... ... @@ -155,13 +175,14 @@ class OleRecordStream(object):
155 175 abstract base class
156 176 """
157 177  
158   - def __init__(self, stream, size, name, stream_type):
  178 + def __init__(self, stream, size, name, stream_type, is_encrypted=False):
159 179 self.stream = stream
160 180 self.size = size
161 181 self.name = name
162 182 if stream_type not in ENTRY_TYPE2STR:
163 183 raise ValueError('Unknown stream type: {0}'.format(stream_type))
164 184 self.stream_type = stream_type
  185 + self.is_encrypted = is_encrypted
165 186  
166 187 def read_record_head(self):
167 188 """ read first few bytes of record to determine size and type
... ... @@ -190,6 +211,9 @@ class OleRecordStream(object):
190 211  
191 212 Stream must be positioned at start of records (e.g. start of stream).
192 213 """
  214 + if self.is_encrypted:
  215 + raise FileIsEncryptedError()
  216 +
193 217 while True:
194 218 # unpacking as in olevba._extract_vba
195 219 pos = self.stream.tell()
... ... @@ -234,6 +258,8 @@ class OleSummaryInformationStream(OleRecordStream):
234 258  
235 259 Do nothing so far. OleFileIO reads quite some info from this. For more info
236 260 see [MS-OSHARED] 2.3.3 and [MS-OLEPS] 2.21 and references therein.
  261 +
  262 + See also: info read in oleid.py.
237 263 """
238 264 def iter_records(self, fill_data=False):
239 265 """ yields nothing, stops at once """
... ...
oletools/xls_parser.py
... ... @@ -86,14 +86,16 @@ def is_xls(filename):
86 86 returns True if given file is an ole file and contains a Workbook stream
87 87  
88 88 todo: could further check that workbook stream starts with a globals
89   - substream
  89 + substream.
  90 + See also: oleid.OleID.check_excel
90 91 """
91 92 try:
92 93 for stream in XlsFile(filename).iter_streams():
93 94 if isinstance(stream, WorkbookStream):
94 95 return True
95 96 except Exception:
96   - return False
  97 + pass
  98 + return False
97 99  
98 100  
99 101 def read_unicode(data, start_idx, n_chars):
... ... @@ -130,6 +132,8 @@ class XlsFile(record_base.OleRecordFile):
130 132 @classmethod
131 133 def stream_class_for_name(cls, stream_name):
132 134 """ helper for iter_streams """
  135 + if stream_name == 'Workbook':
  136 + return WorkbookStream
133 137 return XlsStream
134 138  
135 139  
... ...
tests/msodde/test_basic.py
... ... @@ -11,6 +11,7 @@ from __future__ import print_function
11 11 import unittest
12 12 from oletools import msodde
13 13 from tests.test_utils import DATA_BASE_DIR as BASE_DIR
  14 +import os
14 15 from os.path import join
15 16 from traceback import print_exc
16 17  
... ... @@ -55,6 +56,20 @@ class TestReturnCode(unittest.TestCase):
55 56 """ check that text file argument leads to non-zero exit status """
56 57 self.do_test_validity(join(BASE_DIR, 'basic/text'), True)
57 58  
  59 + def test_encrypted(self):
  60 + """
  61 + check that encrypted files lead to non-zero exit status
  62 +
  63 + Currently, only the encryption applied by Office 2010 (CryptoApi RC4
  64 + Encryption) is tested.
  65 + """
  66 + CRYPT_DIR = join(BASE_DIR, 'encrypted')
  67 + ADD_ARGS = '', '-j', '-d', '-f', '-a'
  68 + for filename in os.listdir(CRYPT_DIR):
  69 + full_name = join(CRYPT_DIR, filename)
  70 + for args in ADD_ARGS:
  71 + self.do_test_validity(args + ' ' + full_name, True)
  72 +
58 73 def do_test_validity(self, args, expect_error=False):
59 74 """ helper for test_valid_doc[x] """
60 75 have_exception = False
... ...
tests/oleid/test_basic.py 0 → 100644
  1 +"""
  2 +Test basic functionality of oleid
  3 +
  4 +Should work with python2 and python3!
  5 +"""
  6 +
  7 +import unittest
  8 +import os
  9 +from os.path import join, relpath, splitext
  10 +from oletools import oleid
  11 +
  12 +# Directory with test data, independent of current working directory
  13 +from tests.test_utils import DATA_BASE_DIR
  14 +
  15 +
  16 +class TestOleIDBasic(unittest.TestCase):
  17 + """Test basic functionality of OleID"""
  18 +
  19 + def test_all(self):
  20 + """Run all file in test-data through oleid and compare to known ouput"""
  21 + # this relies on order of indicators being constant, could relax that
  22 + # Also requires that files have the correct suffixes (no rtf in doc)
  23 + NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '')
  24 + NON_OLE_VALUES = (False, )
  25 + WORD = b'Microsoft Office Word'
  26 + PPT = b'Microsoft Office PowerPoint'
  27 + EXCEL = b'Microsoft Excel'
  28 + CRYPT = (True, False, 'unknown', True, False, False, False, False,
  29 + False, False, 0)
  30 + OLE_VALUES = {
  31 + 'oleobj/sample_with_lnk_file.doc': (True, True, WORD, False, True,
  32 + False, False, False, False,
  33 + True, 0),
  34 + 'oleobj/embedded-simple-2007.xlsb': (False,),
  35 + 'oleobj/embedded-simple-2007.docm': (False,),
  36 + 'oleobj/embedded-simple-2007.xltx': (False,),
  37 + 'oleobj/embedded-simple-2007.xlam': (False,),
  38 + 'oleobj/embedded-simple-2007.dotm': (False,),
  39 + 'oleobj/sample_with_lnk_file.ppt': (True, True, PPT, False, False,
  40 + False, False, True, False,
  41 + False, 0),
  42 + 'oleobj/embedded-simple-2007.xlsx': (False,),
  43 + 'oleobj/embedded-simple-2007.xlsm': (False,),
  44 + 'oleobj/embedded-simple-2007.ppsx': (False,),
  45 + 'oleobj/embedded-simple-2007.pps': (True, True, PPT, False, False,
  46 + False, False, True, False,
  47 + False, 0),
  48 + 'oleobj/embedded-simple-2007.xla': (True, True, EXCEL, False,
  49 + False, False, True, False,
  50 + False, False, 0),
  51 + 'oleobj/sample_with_calc_embedded.doc': (True, True, WORD, False,
  52 + True, False, False, False,
  53 + False, True, 0),
  54 + 'oleobj/embedded-unicode-2007.docx': (False,),
  55 + 'oleobj/embedded-unicode.doc': (True, True, WORD, False, True,
  56 + False, False, False, False, True,
  57 + 0),
  58 + 'oleobj/embedded-simple-2007.doc': (True, True, WORD, False, True,
  59 + False, False, False, False,
  60 + True, 0),
  61 + 'oleobj/embedded-simple-2007.xls': (True, True, EXCEL, False,
  62 + False, False, True, False,
  63 + False, False, 0),
  64 + 'oleobj/embedded-simple-2007.dot': (True, True, WORD, False, True,
  65 + False, False, False, False,
  66 + True, 0),
  67 + 'oleobj/sample_with_lnk_to_calc.doc': (True, True, WORD, False,
  68 + True, False, False, False,
  69 + False, True, 0),
  70 + 'oleobj/embedded-simple-2007.ppt': (True, True, PPT, False, False,
  71 + False, False, True, False,
  72 + False, 0),
  73 + 'oleobj/sample_with_lnk_file.pps': (True, True, PPT, False, False,
  74 + False, False, True, False,
  75 + False, 0),
  76 + 'oleobj/embedded-simple-2007.pptx': (False,),
  77 + 'oleobj/embedded-simple-2007.ppsm': (False,),
  78 + 'oleobj/embedded-simple-2007.dotx': (False,),
  79 + 'oleobj/embedded-simple-2007.pptm': (False,),
  80 + 'oleobj/embedded-simple-2007.xlt': (True, True, EXCEL, False,
  81 + False, False, True, False,
  82 + False, False, 0),
  83 + 'oleobj/embedded-simple-2007.docx': (False,),
  84 + 'oleobj/embedded-simple-2007.potx': (False,),
  85 + 'oleobj/embedded-simple-2007.pot': (True, True, PPT, False, False,
  86 + False, False, True, False,
  87 + False, 0),
  88 + 'oleobj/embedded-simple-2007.xltm': (False,),
  89 + 'oleobj/embedded-simple-2007.potm': (False,),
  90 + 'encrypted/encrypted.xlsx': CRYPT,
  91 + 'encrypted/encrypted.docm': CRYPT,
  92 + 'encrypted/encrypted.docx': CRYPT,
  93 + 'encrypted/encrypted.pptm': CRYPT,
  94 + 'encrypted/encrypted.xlsb': CRYPT,
  95 + 'encrypted/encrypted.xls': (True, True, EXCEL, True, False, False,
  96 + True, False, False, False, 0),
  97 + 'encrypted/encrypted.ppt': (True, False, 'unknown', True, False,
  98 + False, False, True, False, False, 0),
  99 + 'encrypted/encrypted.pptx': CRYPT,
  100 + 'encrypted/encrypted.xlsm': CRYPT,
  101 + 'encrypted/encrypted.doc': (True, True, WORD, True, True, False,
  102 + False, False, False, False, 0),
  103 + 'msodde/harmless-clean.docm': (False,),
  104 + 'msodde/dde-in-csv.csv': (False,),
  105 + 'msodde/dde-test-from-office2013-utf_16le-korean.doc':
  106 + (True, True, WORD, False, True, False, False, False, False,
  107 + False, 0),
  108 + 'msodde/harmless-clean.doc': (True, True, WORD, False, True, False,
  109 + False, False, False, False, 0),
  110 + 'msodde/dde-test.docm': (False,),
  111 + 'msodde/dde-test.xlsb': (False,),
  112 + 'msodde/dde-test.xlsm': (False,),
  113 + 'msodde/dde-test.docx': (False,),
  114 + 'msodde/dde-test.xlsx': (False,),
  115 + 'msodde/dde-test-from-office2003.doc': (True, True, WORD, False,
  116 + True, False, False, False,
  117 + False, False, 0),
  118 + 'msodde/dde-test-from-office2016.doc': (True, True, WORD, False,
  119 + True, False, False, False,
  120 + False, False, 0),
  121 + 'msodde/harmless-clean.docx': (False,),
  122 + 'oleform/oleform-PR314.docm': (False,),
  123 + 'basic/encrypted.docx': CRYPT,
  124 + }
  125 +
  126 + indicator_names = []
  127 + for base_dir, _, files in os.walk(DATA_BASE_DIR):
  128 + for filename in files:
  129 + full_path = join(base_dir, filename)
  130 + name = relpath(full_path, DATA_BASE_DIR)
  131 + values = tuple(indicator.value for indicator in
  132 + oleid.OleID(full_path).check())
  133 + if len(indicator_names) < 2: # not initialized with ole yet
  134 + indicator_names = tuple(indicator.name for indicator in
  135 + oleid.OleID(full_path).check())
  136 + suffix = splitext(filename)[1]
  137 + if suffix in NON_OLE_SUFFIXES:
  138 + self.assertEqual(values, NON_OLE_VALUES,
  139 + msg='For non-ole file {} expected {}, '
  140 + 'not {}'.format(name, NON_OLE_VALUES,
  141 + values))
  142 + continue
  143 + try:
  144 + self.assertEqual(values, OLE_VALUES[name],
  145 + msg='Wrong detail values for {}:\n'
  146 + ' Names {}\n Found {}\n Expect {}'
  147 + .format(name, indicator_names, values,
  148 + OLE_VALUES[name]))
  149 + except KeyError:
  150 + print('Should add oleid output for {} to {} ({})'
  151 + .format(name, __name__, values[3:]))
  152 +
  153 +# just in case somebody calls this file as a script
  154 +if __name__ == '__main__':
  155 + unittest.main()
... ...
tests/olevba/__init__.py 0 → 100644
tests/olevba/test_basic.py 0 → 100644
  1 +"""
  2 +Test basic functionality of olevba[3]
  3 +"""
  4 +
  5 +import unittest
  6 +import sys
  7 +if sys.version_info.major <= 2:
  8 + from oletools import olevba
  9 +else:
  10 + from oletools import olevba3 as olevba
  11 +import os
  12 +from os.path import join
  13 +
  14 +# Directory with test data, independent of current working directory
  15 +from tests.test_utils import DATA_BASE_DIR
  16 +
  17 +
  18 +class TestOlevbaBasic(unittest.TestCase):
  19 + """Tests olevba basic functionality"""
  20 +
  21 + def test_crypt_return(self):
  22 + """
  23 + Tests that encrypted files give a certain return code.
  24 +
  25 + Currently, only the encryption applied by Office 2010 (CryptoApi RC4
  26 + Encryption) is tested.
  27 + """
  28 + CRYPT_DIR = join(DATA_BASE_DIR, 'encrypted')
  29 + CRYPT_RETURN_CODE = 9
  30 + ADD_ARGS = [], ['-d', ], ['-a', ], ['-j', ], ['-t', ]
  31 + for filename in os.listdir(CRYPT_DIR):
  32 + full_name = join(CRYPT_DIR, filename)
  33 + for args in ADD_ARGS:
  34 + try:
  35 + ret_code = olevba.main(args + [full_name, ])
  36 + except SystemExit as se:
  37 + ret_code = se.code or 0 # se.code can be None
  38 + self.assertEqual(ret_code, CRYPT_RETURN_CODE,
  39 + msg='Wrong return code {} for args {}'
  40 + .format(ret_code, args + [filename, ]))
  41 +
  42 +
  43 +# just in case somebody calls this file as a script
  44 +if __name__ == '__main__':
  45 + unittest.main()
... ...
tests/test-data/encrypted/encrypted.doc 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.docm 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.docx 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.ppt 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.pptm 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.pptx 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xls 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xlsb 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xlsm 0 → 100644
No preview for this file type
tests/test-data/encrypted/encrypted.xlsx 0 → 100644
No preview for this file type