Commit 461d78ed3919862d05c37516ba6497749789ff00

Authored by decalage2
1 parent 66020d44

oleid: added risk levels, use tablestream with colors, use olevba+mraptor to check VBA macros (#679)

Showing 1 changed file with 128 additions and 39 deletions
oletools/oleid.py
... ... @@ -60,7 +60,7 @@ from __future__ import print_function
60 60 # improve encryption detection for ppt
61 61 # 2021-05-07 v0.56.2 MN: - fixed bug in check_excel (issue #584, PR #585)
62 62  
63   -__version__ = '0.56.2'
  63 +__version__ = '0.60.dev1'
64 64  
65 65  
66 66 #------------------------------------------------------------------------------
... ... @@ -81,8 +81,7 @@ __version__ = '0.56.2'
81 81  
82 82 #=== IMPORTS =================================================================
83 83  
84   -import argparse, sys, re, zlib, struct, os
85   -from os.path import dirname, abspath
  84 +import argparse, sys, re, zlib, struct, os, io
86 85  
87 86 import olefile
88 87  
... ... @@ -98,10 +97,37 @@ _parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
98 97 if _parent_dir not in sys.path:
99 98 sys.path.insert(0, _parent_dir)
100 99  
101   -from oletools.thirdparty.prettytable import prettytable
102   -from oletools import crypto
  100 +from oletools.thirdparty.tablestream import tablestream
  101 +from oletools import crypto, ftguess, olevba, mraptor
  102 +from oletools.common.log_helper import log_helper
103 103  
  104 +# === LOGGING =================================================================
104 105  
  106 +log = log_helper.get_or_create_silent_logger('oleid')
  107 +
  108 +# === CONSTANTS ===============================================================
  109 +
  110 +class RISK(object):
  111 + """
  112 + Constants for risk levels
  113 + """
  114 + HIGH = 'HIGH'
  115 + MEDIUM = 'Medium'
  116 + LOW = 'low'
  117 + NONE = 'none'
  118 + INFO = 'info'
  119 + UNKNOWN = 'Unknown'
  120 + ERROR = 'Error' # if a check triggered an unexpected error
  121 +
  122 +risk_color = {
  123 + RISK.HIGH: 'red',
  124 + RISK.MEDIUM: 'yellow',
  125 + RISK.LOW: 'white',
  126 + RISK.NONE: 'green',
  127 + RISK.INFO: 'cyan',
  128 + RISK.UNKNOWN: None,
  129 + RISK.ERROR: None
  130 +}
105 131  
106 132 #=== FUNCTIONS ===============================================================
107 133  
... ... @@ -166,7 +192,7 @@ class Indicator(object):
166 192 """
167 193  
168 194 def __init__(self, _id, value=None, _type=bool, name=None,
169   - description=None):
  195 + description=None, risk=RISK.UNKNOWN, hide_if_false=True):
170 196 self.id = _id
171 197 self.value = value
172 198 self.type = _type
... ... @@ -174,17 +200,19 @@ class Indicator(object):
174 200 if name == None:
175 201 self.name = _id
176 202 self.description = description
  203 + self.risk = risk
  204 + self.hide_if_false = hide_if_false
177 205  
178 206  
179 207 class OleID(object):
180 208 """
181   - Summary of information about an OLE file
  209 + Summary of information about an OLE file (and a few other MS Office formats)
182 210  
183 211 Call :py:meth:`OleID.check` to gather all info on a given file or run one
184 212 of the `check_` functions to just get a specific piece of info.
185 213 """
186 214  
187   - def __init__(self, input_file):
  215 + def __init__(self, filename=None, data=None):
188 216 """
189 217 Create an OleID object
190 218  
... ... @@ -199,11 +227,17 @@ class OleID(object):
199 227 If filename is given, only :py:meth:`OleID.check` opens the file. Other
200 228 functions will return None
201 229 """
202   - if isinstance(input_file, olefile.OleFileIO):
203   - self.ole = input_file
  230 + if filename is None and data is None:
  231 + raise ValueError('OleID requires either a file path or file data, or both')
  232 + if data is None:
  233 + with open(filename, 'rb') as f:
  234 + self.data = f.read()
  235 + self.data_bytesio = io.BytesIO(self.data)
  236 + if isinstance(filename, olefile.OleFileIO):
  237 + self.ole = filename
204 238 self.filename = None
205 239 else:
206   - self.filename = input_file
  240 + self.filename = filename
207 241 self.ole = None
208 242 self.indicators = []
209 243 self.suminfo_data = None
... ... @@ -214,24 +248,37 @@ class OleID(object):
214 248  
215 249 :returns: list of all :py:class:`Indicator`s created
216 250 """
  251 + self.ftg = ftguess.FileTypeGuesser(filepath=self.filename, data=self.data)
  252 + ftype = self.ftg.ftype
  253 + ft = Indicator('ftype', value=ftype.longname, _type=str, name='File format', risk=RISK.INFO)
  254 + self.indicators.append(ft)
  255 + ct = Indicator('container', value=ftype.container, _type=str, name='Container format', risk=RISK.INFO)
  256 + self.indicators.append(ct)
  257 +
217 258 # check if it is actually an OLE file:
218   - oleformat = Indicator('ole_format', True, name='OLE format')
219   - self.indicators.append(oleformat)
220   - if self.ole:
221   - oleformat.value = True
222   - elif not olefile.isOleFile(self.filename):
223   - oleformat.value = False
224   - return self.indicators
225   - else:
226   - # parse file:
227   - self.ole = olefile.OleFileIO(self.filename)
  259 + if self.ftg.container == ftguess.CONTAINER.OLE:
  260 + # reuse olefile already opened by ftguess
  261 + self.ole = self.ftg.olefile
  262 + # oleformat = Indicator('ole_format', True, name='OLE format')
  263 + # self.indicators.append(oleformat)
  264 + # if self.ole:
  265 + # oleformat.value = True
  266 + # elif not olefile.isOleFile(self.filename):
  267 + # oleformat.value = False
  268 + # return self.indicators
  269 + # else:
  270 + # # parse file:
  271 + # self.ole = olefile.OleFileIO(self.filename)
  272 +
228 273 # checks:
  274 + # TODO: add try/except around each check
229 275 self.check_properties()
230 276 self.check_encrypted()
231   - self.check_word()
232   - self.check_excel()
233   - self.check_powerpoint()
234   - self.check_visio()
  277 + # self.check_word()
  278 + # self.check_excel()
  279 + # self.check_powerpoint()
  280 + # self.check_visio()
  281 + self.check_macros()
235 282 self.check_object_pool()
236 283 self.check_flash()
237 284 self.ole.close()
... ... @@ -244,6 +291,7 @@ class OleID(object):
244 291 :returns: 2 :py:class:`Indicator`s (for presence of summary info and
245 292 application name) or None if file was not opened
246 293 """
  294 + # TODO: use get_metadata
247 295 suminfo = Indicator('has_suminfo', False,
248 296 name='Has SummaryInformation stream')
249 297 self.indicators.append(suminfo)
... ... @@ -280,11 +328,17 @@ class OleID(object):
280 328 opened
281 329 """
282 330 # we keep the pointer to the indicator, can be modified by other checks:
283   - encrypted = Indicator('encrypted', False, name='Encrypted')
  331 + encrypted = Indicator('encrypted', False, name='Encrypted',
  332 + risk=RISK.NONE,
  333 + description='The file is not encrypted',
  334 + hide_if_false=False)
284 335 self.indicators.append(encrypted)
285 336 if not self.ole:
286 337 return None
287   - encrypted.value = crypto.is_encrypted(self.ole)
  338 + if crypto.is_encrypted(self.ole):
  339 + encrypted.value = True
  340 + encrypted.risk = RISK.LOW
  341 + encrypted.description = 'The file is encrypted. It may be decrypted with msoffcrypto-tool'
288 342 return encrypted
289 343  
290 344 def check_word(self):
... ... @@ -302,7 +356,7 @@ class OleID(object):
302 356 description='Contains a WordDocument stream, very likely to be a '
303 357 'Microsoft Word Document.')
304 358 self.indicators.append(word)
305   - macros = Indicator('vba_macros', False, name='VBA Macros')
  359 + macros = Indicator('vba_macros', False, name='VBA Macros', risk=RISK.MEDIUM)
306 360 self.indicators.append(macros)
307 361 if not self.ole:
308 362 return None, None
... ... @@ -400,6 +454,36 @@ class OleID(object):
400 454 objpool.value = True
401 455 return objpool
402 456  
  457 + def check_macros(self):
  458 + """
  459 + Check whether this file contains macros (VBA and XLM/Excel 4).
  460 +
  461 + :returns: :py:class:`Indicator`
  462 + """
  463 + vba_indicator = Indicator(_id='vba', value='No', _type=str, name='VBA Macros',
  464 + description='This file does not contain VBA macros.',
  465 + risk=RISK.NONE)
  466 + try:
  467 + vba_parser = olevba.VBA_Parser(filename=self.filename, data=self.data)
  468 + if vba_parser.detect_vba_macros():
  469 + vba_indicator.value = 'Yes'
  470 + vba_indicator.risk = RISK.MEDIUM
  471 + vba_indicator.description = 'This file contains VBA macros. No suspicious keyword was found. Use olevba and mraptor for more info.'
  472 + # check code with mraptor
  473 + vba_code = vba_parser.get_vba_code_all_modules()
  474 + m = mraptor.MacroRaptor(vba_code)
  475 + m.scan()
  476 + if m.suspicious:
  477 + vba_indicator.value = 'Yes, suspicious'
  478 + vba_indicator.risk = RISK.HIGH
  479 + vba_indicator.description = 'This file contains VBA macros. Suspicious keywords were found. Use olevba and mraptor for more info.'
  480 + except Exception as e:
  481 + vba_indicator.risk = RISK.ERROR
  482 + vba_indicator.value = 'Error'
  483 + vba_indicator.description = 'Error while checking VBA macros: %s' % str(e)
  484 + self.indicators.append(vba_indicator)
  485 + return vba_indicator
  486 +
403 487 def check_flash(self):
404 488 """
405 489 Check whether this file contains flash objects
... ... @@ -407,11 +491,13 @@ class OleID(object):
407 491 :returns: :py:class:`Indicator` for count of flash objects or None if
408 492 file was not opened
409 493 """
  494 + # TODO: add support for RTF and OpenXML formats
410 495 flash = Indicator(
411 496 'flash', 0, _type=int, name='Flash objects',
412 497 description='Number of embedded Flash objects (SWF files) detected '
413 498 'in OLE streams. Not 100% accurate, there may be false '
414   - 'positives.')
  499 + 'positives.',
  500 + risk=RISK.NONE)
415 501 self.indicators.append(flash)
416 502 if not self.ole:
417 503 return None
... ... @@ -421,6 +507,8 @@ class OleID(object):
421 507 # just add to the count of Flash objects:
422 508 flash.value += len(found)
423 509 #print stream, found
  510 + if flash.value > 0:
  511 + flash.risk = RISK.MEDIUM
424 512 return flash
425 513  
426 514  
... ... @@ -449,6 +537,8 @@ def main():
449 537 parser.print_help()
450 538 return
451 539  
  540 + log_helper.enable_logging()
  541 +
452 542 for filename in args.input:
453 543 print('Filename:', filename)
454 544 oleid = OleID(filename)
... ... @@ -456,17 +546,16 @@ def main():
456 546  
457 547 #TODO: add description
458 548 #TODO: highlight suspicious indicators
459   - table = prettytable.PrettyTable(['Indicator', 'Value'])
460   - table.align = 'l'
461   - table.max_width = 39
462   - table.border = False
463   -
  549 + table = tablestream.TableStream([20, 20, 10, 26],
  550 + header_row=['Indicator', 'Value', 'Risk', 'Description'],
  551 + style=tablestream.TableStyleSlimSep)
464 552 for indicator in indicators:
465   - #print '%s: %s' % (indicator.name, indicator.value)
466   - table.add_row((indicator.name, indicator.value))
467   -
468   - print(table)
469   - print('')
  553 + if not (indicator.hide_if_false and not indicator.value):
  554 + #print '%s: %s' % (indicator.name, indicator.value)
  555 + color = risk_color.get(indicator.risk, None)
  556 + table.write_row((indicator.name, indicator.value, indicator.risk, indicator.description),
  557 + colors=(color, color, color, None))
  558 + table.close()
470 559  
471 560 if __name__ == '__main__':
472 561 main()
... ...