Commit 461d78ed3919862d05c37516ba6497749789ff00

Authored by decalage2
1 parent 66020d44

oleid: added risk levels, use tablestream with colors, use olevba+mraptor to check VBA macros (#679)

Showing 1 changed file with 128 additions and 39 deletions
oletools/oleid.py
@@ -60,7 +60,7 @@ from __future__ import print_function @@ -60,7 +60,7 @@ from __future__ import print_function
60 # improve encryption detection for ppt 60 # improve encryption detection for ppt
61 # 2021-05-07 v0.56.2 MN: - fixed bug in check_excel (issue #584, PR #585) 61 # 2021-05-07 v0.56.2 MN: - fixed bug in check_excel (issue #584, PR #585)
62 62
63 -__version__ = '0.56.2' 63 +__version__ = '0.60.dev1'
64 64
65 65
66 #------------------------------------------------------------------------------ 66 #------------------------------------------------------------------------------
@@ -81,8 +81,7 @@ __version__ = '0.56.2' @@ -81,8 +81,7 @@ __version__ = '0.56.2'
81 81
82 #=== IMPORTS ================================================================= 82 #=== IMPORTS =================================================================
83 83
84 -import argparse, sys, re, zlib, struct, os  
85 -from os.path import dirname, abspath 84 +import argparse, sys, re, zlib, struct, os, io
86 85
87 import olefile 86 import olefile
88 87
@@ -98,10 +97,37 @@ _parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..')) @@ -98,10 +97,37 @@ _parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
98 if _parent_dir not in sys.path: 97 if _parent_dir not in sys.path:
99 sys.path.insert(0, _parent_dir) 98 sys.path.insert(0, _parent_dir)
100 99
101 -from oletools.thirdparty.prettytable import prettytable  
102 -from oletools import crypto 100 +from oletools.thirdparty.tablestream import tablestream
  101 +from oletools import crypto, ftguess, olevba, mraptor
  102 +from oletools.common.log_helper import log_helper
103 103
  104 +# === LOGGING =================================================================
104 105
  106 +log = log_helper.get_or_create_silent_logger('oleid')
  107 +
  108 +# === CONSTANTS ===============================================================
  109 +
  110 +class RISK(object):
  111 + """
  112 + Constants for risk levels
  113 + """
  114 + HIGH = 'HIGH'
  115 + MEDIUM = 'Medium'
  116 + LOW = 'low'
  117 + NONE = 'none'
  118 + INFO = 'info'
  119 + UNKNOWN = 'Unknown'
  120 + ERROR = 'Error' # if a check triggered an unexpected error
  121 +
  122 +risk_color = {
  123 + RISK.HIGH: 'red',
  124 + RISK.MEDIUM: 'yellow',
  125 + RISK.LOW: 'white',
  126 + RISK.NONE: 'green',
  127 + RISK.INFO: 'cyan',
  128 + RISK.UNKNOWN: None,
  129 + RISK.ERROR: None
  130 +}
105 131
106 #=== FUNCTIONS =============================================================== 132 #=== FUNCTIONS ===============================================================
107 133
@@ -166,7 +192,7 @@ class Indicator(object): @@ -166,7 +192,7 @@ class Indicator(object):
166 """ 192 """
167 193
168 def __init__(self, _id, value=None, _type=bool, name=None, 194 def __init__(self, _id, value=None, _type=bool, name=None,
169 - description=None): 195 + description=None, risk=RISK.UNKNOWN, hide_if_false=True):
170 self.id = _id 196 self.id = _id
171 self.value = value 197 self.value = value
172 self.type = _type 198 self.type = _type
@@ -174,17 +200,19 @@ class Indicator(object): @@ -174,17 +200,19 @@ class Indicator(object):
174 if name == None: 200 if name == None:
175 self.name = _id 201 self.name = _id
176 self.description = description 202 self.description = description
  203 + self.risk = risk
  204 + self.hide_if_false = hide_if_false
177 205
178 206
179 class OleID(object): 207 class OleID(object):
180 """ 208 """
181 - Summary of information about an OLE file 209 + Summary of information about an OLE file (and a few other MS Office formats)
182 210
183 Call :py:meth:`OleID.check` to gather all info on a given file or run one 211 Call :py:meth:`OleID.check` to gather all info on a given file or run one
184 of the `check_` functions to just get a specific piece of info. 212 of the `check_` functions to just get a specific piece of info.
185 """ 213 """
186 214
187 - def __init__(self, input_file): 215 + def __init__(self, filename=None, data=None):
188 """ 216 """
189 Create an OleID object 217 Create an OleID object
190 218
@@ -199,11 +227,17 @@ class OleID(object): @@ -199,11 +227,17 @@ class OleID(object):
199 If filename is given, only :py:meth:`OleID.check` opens the file. Other 227 If filename is given, only :py:meth:`OleID.check` opens the file. Other
200 functions will return None 228 functions will return None
201 """ 229 """
202 - if isinstance(input_file, olefile.OleFileIO):  
203 - self.ole = input_file 230 + if filename is None and data is None:
  231 + raise ValueError('OleID requires either a file path or file data, or both')
  232 + if data is None:
  233 + with open(filename, 'rb') as f:
  234 + self.data = f.read()
  235 + self.data_bytesio = io.BytesIO(self.data)
  236 + if isinstance(filename, olefile.OleFileIO):
  237 + self.ole = filename
204 self.filename = None 238 self.filename = None
205 else: 239 else:
206 - self.filename = input_file 240 + self.filename = filename
207 self.ole = None 241 self.ole = None
208 self.indicators = [] 242 self.indicators = []
209 self.suminfo_data = None 243 self.suminfo_data = None
@@ -214,24 +248,37 @@ class OleID(object): @@ -214,24 +248,37 @@ class OleID(object):
214 248
215 :returns: list of all :py:class:`Indicator`s created 249 :returns: list of all :py:class:`Indicator`s created
216 """ 250 """
  251 + self.ftg = ftguess.FileTypeGuesser(filepath=self.filename, data=self.data)
  252 + ftype = self.ftg.ftype
  253 + ft = Indicator('ftype', value=ftype.longname, _type=str, name='File format', risk=RISK.INFO)
  254 + self.indicators.append(ft)
  255 + ct = Indicator('container', value=ftype.container, _type=str, name='Container format', risk=RISK.INFO)
  256 + self.indicators.append(ct)
  257 +
217 # check if it is actually an OLE file: 258 # check if it is actually an OLE file:
218 - oleformat = Indicator('ole_format', True, name='OLE format')  
219 - self.indicators.append(oleformat)  
220 - if self.ole:  
221 - oleformat.value = True  
222 - elif not olefile.isOleFile(self.filename):  
223 - oleformat.value = False  
224 - return self.indicators  
225 - else:  
226 - # parse file:  
227 - self.ole = olefile.OleFileIO(self.filename) 259 + if self.ftg.container == ftguess.CONTAINER.OLE:
  260 + # reuse olefile already opened by ftguess
  261 + self.ole = self.ftg.olefile
  262 + # oleformat = Indicator('ole_format', True, name='OLE format')
  263 + # self.indicators.append(oleformat)
  264 + # if self.ole:
  265 + # oleformat.value = True
  266 + # elif not olefile.isOleFile(self.filename):
  267 + # oleformat.value = False
  268 + # return self.indicators
  269 + # else:
  270 + # # parse file:
  271 + # self.ole = olefile.OleFileIO(self.filename)
  272 +
228 # checks: 273 # checks:
  274 + # TODO: add try/except around each check
229 self.check_properties() 275 self.check_properties()
230 self.check_encrypted() 276 self.check_encrypted()
231 - self.check_word()  
232 - self.check_excel()  
233 - self.check_powerpoint()  
234 - self.check_visio() 277 + # self.check_word()
  278 + # self.check_excel()
  279 + # self.check_powerpoint()
  280 + # self.check_visio()
  281 + self.check_macros()
235 self.check_object_pool() 282 self.check_object_pool()
236 self.check_flash() 283 self.check_flash()
237 self.ole.close() 284 self.ole.close()
@@ -244,6 +291,7 @@ class OleID(object): @@ -244,6 +291,7 @@ class OleID(object):
244 :returns: 2 :py:class:`Indicator`s (for presence of summary info and 291 :returns: 2 :py:class:`Indicator`s (for presence of summary info and
245 application name) or None if file was not opened 292 application name) or None if file was not opened
246 """ 293 """
  294 + # TODO: use get_metadata
247 suminfo = Indicator('has_suminfo', False, 295 suminfo = Indicator('has_suminfo', False,
248 name='Has SummaryInformation stream') 296 name='Has SummaryInformation stream')
249 self.indicators.append(suminfo) 297 self.indicators.append(suminfo)
@@ -280,11 +328,17 @@ class OleID(object): @@ -280,11 +328,17 @@ class OleID(object):
280 opened 328 opened
281 """ 329 """
282 # we keep the pointer to the indicator, can be modified by other checks: 330 # we keep the pointer to the indicator, can be modified by other checks:
283 - encrypted = Indicator('encrypted', False, name='Encrypted') 331 + encrypted = Indicator('encrypted', False, name='Encrypted',
  332 + risk=RISK.NONE,
  333 + description='The file is not encrypted',
  334 + hide_if_false=False)
284 self.indicators.append(encrypted) 335 self.indicators.append(encrypted)
285 if not self.ole: 336 if not self.ole:
286 return None 337 return None
287 - encrypted.value = crypto.is_encrypted(self.ole) 338 + if crypto.is_encrypted(self.ole):
  339 + encrypted.value = True
  340 + encrypted.risk = RISK.LOW
  341 + encrypted.description = 'The file is encrypted. It may be decrypted with msoffcrypto-tool'
288 return encrypted 342 return encrypted
289 343
290 def check_word(self): 344 def check_word(self):
@@ -302,7 +356,7 @@ class OleID(object): @@ -302,7 +356,7 @@ class OleID(object):
302 description='Contains a WordDocument stream, very likely to be a ' 356 description='Contains a WordDocument stream, very likely to be a '
303 'Microsoft Word Document.') 357 'Microsoft Word Document.')
304 self.indicators.append(word) 358 self.indicators.append(word)
305 - macros = Indicator('vba_macros', False, name='VBA Macros') 359 + macros = Indicator('vba_macros', False, name='VBA Macros', risk=RISK.MEDIUM)
306 self.indicators.append(macros) 360 self.indicators.append(macros)
307 if not self.ole: 361 if not self.ole:
308 return None, None 362 return None, None
@@ -400,6 +454,36 @@ class OleID(object): @@ -400,6 +454,36 @@ class OleID(object):
400 objpool.value = True 454 objpool.value = True
401 return objpool 455 return objpool
402 456
  457 + def check_macros(self):
  458 + """
  459 + Check whether this file contains macros (VBA and XLM/Excel 4).
  460 +
  461 + :returns: :py:class:`Indicator`
  462 + """
  463 + vba_indicator = Indicator(_id='vba', value='No', _type=str, name='VBA Macros',
  464 + description='This file does not contain VBA macros.',
  465 + risk=RISK.NONE)
  466 + try:
  467 + vba_parser = olevba.VBA_Parser(filename=self.filename, data=self.data)
  468 + if vba_parser.detect_vba_macros():
  469 + vba_indicator.value = 'Yes'
  470 + vba_indicator.risk = RISK.MEDIUM
  471 + vba_indicator.description = 'This file contains VBA macros. No suspicious keyword was found. Use olevba and mraptor for more info.'
  472 + # check code with mraptor
  473 + vba_code = vba_parser.get_vba_code_all_modules()
  474 + m = mraptor.MacroRaptor(vba_code)
  475 + m.scan()
  476 + if m.suspicious:
  477 + vba_indicator.value = 'Yes, suspicious'
  478 + vba_indicator.risk = RISK.HIGH
  479 + vba_indicator.description = 'This file contains VBA macros. Suspicious keywords were found. Use olevba and mraptor for more info.'
  480 + except Exception as e:
  481 + vba_indicator.risk = RISK.ERROR
  482 + vba_indicator.value = 'Error'
  483 + vba_indicator.description = 'Error while checking VBA macros: %s' % str(e)
  484 + self.indicators.append(vba_indicator)
  485 + return vba_indicator
  486 +
403 def check_flash(self): 487 def check_flash(self):
404 """ 488 """
405 Check whether this file contains flash objects 489 Check whether this file contains flash objects
@@ -407,11 +491,13 @@ class OleID(object): @@ -407,11 +491,13 @@ class OleID(object):
407 :returns: :py:class:`Indicator` for count of flash objects or None if 491 :returns: :py:class:`Indicator` for count of flash objects or None if
408 file was not opened 492 file was not opened
409 """ 493 """
  494 + # TODO: add support for RTF and OpenXML formats
410 flash = Indicator( 495 flash = Indicator(
411 'flash', 0, _type=int, name='Flash objects', 496 'flash', 0, _type=int, name='Flash objects',
412 description='Number of embedded Flash objects (SWF files) detected ' 497 description='Number of embedded Flash objects (SWF files) detected '
413 'in OLE streams. Not 100% accurate, there may be false ' 498 'in OLE streams. Not 100% accurate, there may be false '
414 - 'positives.') 499 + 'positives.',
  500 + risk=RISK.NONE)
415 self.indicators.append(flash) 501 self.indicators.append(flash)
416 if not self.ole: 502 if not self.ole:
417 return None 503 return None
@@ -421,6 +507,8 @@ class OleID(object): @@ -421,6 +507,8 @@ class OleID(object):
421 # just add to the count of Flash objects: 507 # just add to the count of Flash objects:
422 flash.value += len(found) 508 flash.value += len(found)
423 #print stream, found 509 #print stream, found
  510 + if flash.value > 0:
  511 + flash.risk = RISK.MEDIUM
424 return flash 512 return flash
425 513
426 514
@@ -449,6 +537,8 @@ def main(): @@ -449,6 +537,8 @@ def main():
449 parser.print_help() 537 parser.print_help()
450 return 538 return
451 539
  540 + log_helper.enable_logging()
  541 +
452 for filename in args.input: 542 for filename in args.input:
453 print('Filename:', filename) 543 print('Filename:', filename)
454 oleid = OleID(filename) 544 oleid = OleID(filename)
@@ -456,17 +546,16 @@ def main(): @@ -456,17 +546,16 @@ def main():
456 546
457 #TODO: add description 547 #TODO: add description
458 #TODO: highlight suspicious indicators 548 #TODO: highlight suspicious indicators
459 - table = prettytable.PrettyTable(['Indicator', 'Value'])  
460 - table.align = 'l'  
461 - table.max_width = 39  
462 - table.border = False  
463 - 549 + table = tablestream.TableStream([20, 20, 10, 26],
  550 + header_row=['Indicator', 'Value', 'Risk', 'Description'],
  551 + style=tablestream.TableStyleSlimSep)
464 for indicator in indicators: 552 for indicator in indicators:
465 - #print '%s: %s' % (indicator.name, indicator.value)  
466 - table.add_row((indicator.name, indicator.value))  
467 -  
468 - print(table)  
469 - print('') 553 + if not (indicator.hide_if_false and not indicator.value):
  554 + #print '%s: %s' % (indicator.name, indicator.value)
  555 + color = risk_color.get(indicator.risk, None)
  556 + table.write_row((indicator.name, indicator.value, indicator.risk, indicator.description),
  557 + colors=(color, color, color, None))
  558 + table.close()
470 559
471 if __name__ == '__main__': 560 if __name__ == '__main__':
472 main() 561 main()