Commit 461d78ed3919862d05c37516ba6497749789ff00
1 parent
66020d44
oleid: added risk levels, use tablestream with colors, use olevba+mraptor to check VBA macros (#679)
Showing
1 changed file
with
128 additions
and
39 deletions
oletools/oleid.py
| ... | ... | @@ -60,7 +60,7 @@ from __future__ import print_function |
| 60 | 60 | # improve encryption detection for ppt |
| 61 | 61 | # 2021-05-07 v0.56.2 MN: - fixed bug in check_excel (issue #584, PR #585) |
| 62 | 62 | |
| 63 | -__version__ = '0.56.2' | |
| 63 | +__version__ = '0.60.dev1' | |
| 64 | 64 | |
| 65 | 65 | |
| 66 | 66 | #------------------------------------------------------------------------------ |
| ... | ... | @@ -81,8 +81,7 @@ __version__ = '0.56.2' |
| 81 | 81 | |
| 82 | 82 | #=== IMPORTS ================================================================= |
| 83 | 83 | |
| 84 | -import argparse, sys, re, zlib, struct, os | |
| 85 | -from os.path import dirname, abspath | |
| 84 | +import argparse, sys, re, zlib, struct, os, io | |
| 86 | 85 | |
| 87 | 86 | import olefile |
| 88 | 87 | |
| ... | ... | @@ -98,10 +97,37 @@ _parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..')) |
| 98 | 97 | if _parent_dir not in sys.path: |
| 99 | 98 | sys.path.insert(0, _parent_dir) |
| 100 | 99 | |
| 101 | -from oletools.thirdparty.prettytable import prettytable | |
| 102 | -from oletools import crypto | |
| 100 | +from oletools.thirdparty.tablestream import tablestream | |
| 101 | +from oletools import crypto, ftguess, olevba, mraptor | |
| 102 | +from oletools.common.log_helper import log_helper | |
| 103 | 103 | |
| 104 | +# === LOGGING ================================================================= | |
| 104 | 105 | |
| 106 | +log = log_helper.get_or_create_silent_logger('oleid') | |
| 107 | + | |
| 108 | +# === CONSTANTS =============================================================== | |
| 109 | + | |
| 110 | +class RISK(object): | |
| 111 | + """ | |
| 112 | + Constants for risk levels | |
| 113 | + """ | |
| 114 | + HIGH = 'HIGH' | |
| 115 | + MEDIUM = 'Medium' | |
| 116 | + LOW = 'low' | |
| 117 | + NONE = 'none' | |
| 118 | + INFO = 'info' | |
| 119 | + UNKNOWN = 'Unknown' | |
| 120 | + ERROR = 'Error' # if a check triggered an unexpected error | |
| 121 | + | |
| 122 | +risk_color = { | |
| 123 | + RISK.HIGH: 'red', | |
| 124 | + RISK.MEDIUM: 'yellow', | |
| 125 | + RISK.LOW: 'white', | |
| 126 | + RISK.NONE: 'green', | |
| 127 | + RISK.INFO: 'cyan', | |
| 128 | + RISK.UNKNOWN: None, | |
| 129 | + RISK.ERROR: None | |
| 130 | +} | |
| 105 | 131 | |
| 106 | 132 | #=== FUNCTIONS =============================================================== |
| 107 | 133 | |
| ... | ... | @@ -166,7 +192,7 @@ class Indicator(object): |
| 166 | 192 | """ |
| 167 | 193 | |
| 168 | 194 | def __init__(self, _id, value=None, _type=bool, name=None, |
| 169 | - description=None): | |
| 195 | + description=None, risk=RISK.UNKNOWN, hide_if_false=True): | |
| 170 | 196 | self.id = _id |
| 171 | 197 | self.value = value |
| 172 | 198 | self.type = _type |
| ... | ... | @@ -174,17 +200,19 @@ class Indicator(object): |
| 174 | 200 | if name == None: |
| 175 | 201 | self.name = _id |
| 176 | 202 | self.description = description |
| 203 | + self.risk = risk | |
| 204 | + self.hide_if_false = hide_if_false | |
| 177 | 205 | |
| 178 | 206 | |
| 179 | 207 | class OleID(object): |
| 180 | 208 | """ |
| 181 | - Summary of information about an OLE file | |
| 209 | + Summary of information about an OLE file (and a few other MS Office formats) | |
| 182 | 210 | |
| 183 | 211 | Call :py:meth:`OleID.check` to gather all info on a given file or run one |
| 184 | 212 | of the `check_` functions to just get a specific piece of info. |
| 185 | 213 | """ |
| 186 | 214 | |
| 187 | - def __init__(self, input_file): | |
| 215 | + def __init__(self, filename=None, data=None): | |
| 188 | 216 | """ |
| 189 | 217 | Create an OleID object |
| 190 | 218 | |
| ... | ... | @@ -199,11 +227,17 @@ class OleID(object): |
| 199 | 227 | If filename is given, only :py:meth:`OleID.check` opens the file. Other |
| 200 | 228 | functions will return None |
| 201 | 229 | """ |
| 202 | - if isinstance(input_file, olefile.OleFileIO): | |
| 203 | - self.ole = input_file | |
| 230 | + if filename is None and data is None: | |
| 231 | + raise ValueError('OleID requires either a file path or file data, or both') | |
| 232 | + if data is None: | |
| 233 | + with open(filename, 'rb') as f: | |
| 234 | + self.data = f.read() | |
| 235 | + self.data_bytesio = io.BytesIO(self.data) | |
| 236 | + if isinstance(filename, olefile.OleFileIO): | |
| 237 | + self.ole = filename | |
| 204 | 238 | self.filename = None |
| 205 | 239 | else: |
| 206 | - self.filename = input_file | |
| 240 | + self.filename = filename | |
| 207 | 241 | self.ole = None |
| 208 | 242 | self.indicators = [] |
| 209 | 243 | self.suminfo_data = None |
| ... | ... | @@ -214,24 +248,37 @@ class OleID(object): |
| 214 | 248 | |
| 215 | 249 | :returns: list of all :py:class:`Indicator`s created |
| 216 | 250 | """ |
| 251 | + self.ftg = ftguess.FileTypeGuesser(filepath=self.filename, data=self.data) | |
| 252 | + ftype = self.ftg.ftype | |
| 253 | + ft = Indicator('ftype', value=ftype.longname, _type=str, name='File format', risk=RISK.INFO) | |
| 254 | + self.indicators.append(ft) | |
| 255 | + ct = Indicator('container', value=ftype.container, _type=str, name='Container format', risk=RISK.INFO) | |
| 256 | + self.indicators.append(ct) | |
| 257 | + | |
| 217 | 258 | # check if it is actually an OLE file: |
| 218 | - oleformat = Indicator('ole_format', True, name='OLE format') | |
| 219 | - self.indicators.append(oleformat) | |
| 220 | - if self.ole: | |
| 221 | - oleformat.value = True | |
| 222 | - elif not olefile.isOleFile(self.filename): | |
| 223 | - oleformat.value = False | |
| 224 | - return self.indicators | |
| 225 | - else: | |
| 226 | - # parse file: | |
| 227 | - self.ole = olefile.OleFileIO(self.filename) | |
| 259 | + if self.ftg.container == ftguess.CONTAINER.OLE: | |
| 260 | + # reuse olefile already opened by ftguess | |
| 261 | + self.ole = self.ftg.olefile | |
| 262 | + # oleformat = Indicator('ole_format', True, name='OLE format') | |
| 263 | + # self.indicators.append(oleformat) | |
| 264 | + # if self.ole: | |
| 265 | + # oleformat.value = True | |
| 266 | + # elif not olefile.isOleFile(self.filename): | |
| 267 | + # oleformat.value = False | |
| 268 | + # return self.indicators | |
| 269 | + # else: | |
| 270 | + # # parse file: | |
| 271 | + # self.ole = olefile.OleFileIO(self.filename) | |
| 272 | + | |
| 228 | 273 | # checks: |
| 274 | + # TODO: add try/except around each check | |
| 229 | 275 | self.check_properties() |
| 230 | 276 | self.check_encrypted() |
| 231 | - self.check_word() | |
| 232 | - self.check_excel() | |
| 233 | - self.check_powerpoint() | |
| 234 | - self.check_visio() | |
| 277 | + # self.check_word() | |
| 278 | + # self.check_excel() | |
| 279 | + # self.check_powerpoint() | |
| 280 | + # self.check_visio() | |
| 281 | + self.check_macros() | |
| 235 | 282 | self.check_object_pool() |
| 236 | 283 | self.check_flash() |
| 237 | 284 | self.ole.close() |
| ... | ... | @@ -244,6 +291,7 @@ class OleID(object): |
| 244 | 291 | :returns: 2 :py:class:`Indicator`s (for presence of summary info and |
| 245 | 292 | application name) or None if file was not opened |
| 246 | 293 | """ |
| 294 | + # TODO: use get_metadata | |
| 247 | 295 | suminfo = Indicator('has_suminfo', False, |
| 248 | 296 | name='Has SummaryInformation stream') |
| 249 | 297 | self.indicators.append(suminfo) |
| ... | ... | @@ -280,11 +328,17 @@ class OleID(object): |
| 280 | 328 | opened |
| 281 | 329 | """ |
| 282 | 330 | # we keep the pointer to the indicator, can be modified by other checks: |
| 283 | - encrypted = Indicator('encrypted', False, name='Encrypted') | |
| 331 | + encrypted = Indicator('encrypted', False, name='Encrypted', | |
| 332 | + risk=RISK.NONE, | |
| 333 | + description='The file is not encrypted', | |
| 334 | + hide_if_false=False) | |
| 284 | 335 | self.indicators.append(encrypted) |
| 285 | 336 | if not self.ole: |
| 286 | 337 | return None |
| 287 | - encrypted.value = crypto.is_encrypted(self.ole) | |
| 338 | + if crypto.is_encrypted(self.ole): | |
| 339 | + encrypted.value = True | |
| 340 | + encrypted.risk = RISK.LOW | |
| 341 | + encrypted.description = 'The file is encrypted. It may be decrypted with msoffcrypto-tool' | |
| 288 | 342 | return encrypted |
| 289 | 343 | |
| 290 | 344 | def check_word(self): |
| ... | ... | @@ -302,7 +356,7 @@ class OleID(object): |
| 302 | 356 | description='Contains a WordDocument stream, very likely to be a ' |
| 303 | 357 | 'Microsoft Word Document.') |
| 304 | 358 | self.indicators.append(word) |
| 305 | - macros = Indicator('vba_macros', False, name='VBA Macros') | |
| 359 | + macros = Indicator('vba_macros', False, name='VBA Macros', risk=RISK.MEDIUM) | |
| 306 | 360 | self.indicators.append(macros) |
| 307 | 361 | if not self.ole: |
| 308 | 362 | return None, None |
| ... | ... | @@ -400,6 +454,36 @@ class OleID(object): |
| 400 | 454 | objpool.value = True |
| 401 | 455 | return objpool |
| 402 | 456 | |
| 457 | + def check_macros(self): | |
| 458 | + """ | |
| 459 | + Check whether this file contains macros (VBA and XLM/Excel 4). | |
| 460 | + | |
| 461 | + :returns: :py:class:`Indicator` | |
| 462 | + """ | |
| 463 | + vba_indicator = Indicator(_id='vba', value='No', _type=str, name='VBA Macros', | |
| 464 | + description='This file does not contain VBA macros.', | |
| 465 | + risk=RISK.NONE) | |
| 466 | + try: | |
| 467 | + vba_parser = olevba.VBA_Parser(filename=self.filename, data=self.data) | |
| 468 | + if vba_parser.detect_vba_macros(): | |
| 469 | + vba_indicator.value = 'Yes' | |
| 470 | + vba_indicator.risk = RISK.MEDIUM | |
| 471 | + vba_indicator.description = 'This file contains VBA macros. No suspicious keyword was found. Use olevba and mraptor for more info.' | |
| 472 | + # check code with mraptor | |
| 473 | + vba_code = vba_parser.get_vba_code_all_modules() | |
| 474 | + m = mraptor.MacroRaptor(vba_code) | |
| 475 | + m.scan() | |
| 476 | + if m.suspicious: | |
| 477 | + vba_indicator.value = 'Yes, suspicious' | |
| 478 | + vba_indicator.risk = RISK.HIGH | |
| 479 | + vba_indicator.description = 'This file contains VBA macros. Suspicious keywords were found. Use olevba and mraptor for more info.' | |
| 480 | + except Exception as e: | |
| 481 | + vba_indicator.risk = RISK.ERROR | |
| 482 | + vba_indicator.value = 'Error' | |
| 483 | + vba_indicator.description = 'Error while checking VBA macros: %s' % str(e) | |
| 484 | + self.indicators.append(vba_indicator) | |
| 485 | + return vba_indicator | |
| 486 | + | |
| 403 | 487 | def check_flash(self): |
| 404 | 488 | """ |
| 405 | 489 | Check whether this file contains flash objects |
| ... | ... | @@ -407,11 +491,13 @@ class OleID(object): |
| 407 | 491 | :returns: :py:class:`Indicator` for count of flash objects or None if |
| 408 | 492 | file was not opened |
| 409 | 493 | """ |
| 494 | + # TODO: add support for RTF and OpenXML formats | |
| 410 | 495 | flash = Indicator( |
| 411 | 496 | 'flash', 0, _type=int, name='Flash objects', |
| 412 | 497 | description='Number of embedded Flash objects (SWF files) detected ' |
| 413 | 498 | 'in OLE streams. Not 100% accurate, there may be false ' |
| 414 | - 'positives.') | |
| 499 | + 'positives.', | |
| 500 | + risk=RISK.NONE) | |
| 415 | 501 | self.indicators.append(flash) |
| 416 | 502 | if not self.ole: |
| 417 | 503 | return None |
| ... | ... | @@ -421,6 +507,8 @@ class OleID(object): |
| 421 | 507 | # just add to the count of Flash objects: |
| 422 | 508 | flash.value += len(found) |
| 423 | 509 | #print stream, found |
| 510 | + if flash.value > 0: | |
| 511 | + flash.risk = RISK.MEDIUM | |
| 424 | 512 | return flash |
| 425 | 513 | |
| 426 | 514 | |
| ... | ... | @@ -449,6 +537,8 @@ def main(): |
| 449 | 537 | parser.print_help() |
| 450 | 538 | return |
| 451 | 539 | |
| 540 | + log_helper.enable_logging() | |
| 541 | + | |
| 452 | 542 | for filename in args.input: |
| 453 | 543 | print('Filename:', filename) |
| 454 | 544 | oleid = OleID(filename) |
| ... | ... | @@ -456,17 +546,16 @@ def main(): |
| 456 | 546 | |
| 457 | 547 | #TODO: add description |
| 458 | 548 | #TODO: highlight suspicious indicators |
| 459 | - table = prettytable.PrettyTable(['Indicator', 'Value']) | |
| 460 | - table.align = 'l' | |
| 461 | - table.max_width = 39 | |
| 462 | - table.border = False | |
| 463 | - | |
| 549 | + table = tablestream.TableStream([20, 20, 10, 26], | |
| 550 | + header_row=['Indicator', 'Value', 'Risk', 'Description'], | |
| 551 | + style=tablestream.TableStyleSlimSep) | |
| 464 | 552 | for indicator in indicators: |
| 465 | - #print '%s: %s' % (indicator.name, indicator.value) | |
| 466 | - table.add_row((indicator.name, indicator.value)) | |
| 467 | - | |
| 468 | - print(table) | |
| 469 | - print('') | |
| 553 | + if not (indicator.hide_if_false and not indicator.value): | |
| 554 | + #print '%s: %s' % (indicator.name, indicator.value) | |
| 555 | + color = risk_color.get(indicator.risk, None) | |
| 556 | + table.write_row((indicator.name, indicator.value, indicator.risk, indicator.description), | |
| 557 | + colors=(color, color, color, None)) | |
| 558 | + table.close() | |
| 470 | 559 | |
| 471 | 560 | if __name__ == '__main__': |
| 472 | 561 | main() | ... | ... |