Commit bdad8c146df8e66815562272b6a00f980a359e0e
1 parent
fad632c5
improved support for MHTML files with modified header: fixed issue #11
Showing
1 changed file
with
217 additions
and
168 deletions
oletools/olevba.py
| ... | ... | @@ -23,7 +23,7 @@ olevba is based on source code from officeparser by John William Davison |
| 23 | 23 | https://github.com/unixfreak0037/officeparser |
| 24 | 24 | """ |
| 25 | 25 | |
| 26 | -#=== LICENSE ================================================================== | |
| 26 | +# === LICENSE ================================================================== | |
| 27 | 27 | |
| 28 | 28 | # olevba is copyright (c) 2014-2015 Philippe Lagadec (http://www.decalage.info) |
| 29 | 29 | # All rights reserved. |
| ... | ... | @@ -130,8 +130,10 @@ https://github.com/unixfreak0037/officeparser |
| 130 | 130 | # virtualisation detection |
| 131 | 131 | # 2015-05-06 v0.27 PL: - added support for MHTML files with VBA macros |
| 132 | 132 | # (issue #10 reported by Greg from SpamStopsHere) |
| 133 | +# 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header | |
| 134 | +# (issue #11 reported by Thomas Chopitea) | |
| 133 | 135 | |
| 134 | -__version__ = '0.27' | |
| 136 | +__version__ = '0.28' | |
| 135 | 137 | |
| 136 | 138 | #------------------------------------------------------------------------------ |
| 137 | 139 | # TODO: |
| ... | ... | @@ -178,7 +180,7 @@ import binascii |
| 178 | 180 | import base64 |
| 179 | 181 | import traceback |
| 180 | 182 | import zlib |
| 181 | -import email # for MHTML parsing | |
| 183 | +import email # for MHTML parsing | |
| 182 | 184 | |
| 183 | 185 | # import lxml or ElementTree for XML parsing: |
| 184 | 186 | try: |
| ... | ... | @@ -193,9 +195,9 @@ except ImportError: |
| 193 | 195 | # Python <2.5: standalone ElementTree install |
| 194 | 196 | import elementtree.cElementTree as ET |
| 195 | 197 | except ImportError: |
| 196 | - raise ImportError, "lxml or ElementTree are not installed, "\ | |
| 197 | - +"see http://codespeak.net/lxml "\ | |
| 198 | - +"or http://effbot.org/zone/element-index.htm" | |
| 198 | + raise ImportError, "lxml or ElementTree are not installed, " \ | |
| 199 | + + "see http://codespeak.net/lxml " \ | |
| 200 | + + "or http://effbot.org/zone/element-index.htm" | |
| 199 | 201 | |
| 200 | 202 | import thirdparty.olefile as olefile |
| 201 | 203 | from thirdparty.prettytable import prettytable |
| ... | ... | @@ -203,12 +205,19 @@ from thirdparty.xglob import xglob |
| 203 | 205 | |
| 204 | 206 | #--- CONSTANTS ---------------------------------------------------------------- |
| 205 | 207 | |
| 208 | +# URL and message to report issues: | |
| 209 | +URL_OLEVBA_ISSUES = 'https://bitbucket.org/decalage/oletools/issues' | |
| 210 | +MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES | |
| 211 | + | |
| 206 | 212 | # Container types: |
| 207 | -TYPE_OLE = 'OLE' | |
| 213 | +TYPE_OLE = 'OLE' | |
| 208 | 214 | TYPE_OpenXML = 'OpenXML' |
| 209 | 215 | TYPE_Word2003_XML = 'Word2003_XML' |
| 210 | 216 | TYPE_MHTML = 'MHTML' |
| 211 | 217 | |
| 218 | +# MSO files ActiveMime header magic | |
| 219 | +MSO_ACTIVEMIME_HEADER = 'ActiveMime' | |
| 220 | + | |
| 212 | 221 | MODULE_EXTENSION = "bas" |
| 213 | 222 | CLASS_EXTENSION = "cls" |
| 214 | 223 | FORM_EXTENSION = "frm" |
| ... | ... | @@ -249,28 +258,28 @@ SUSPICIOUS_KEYWORDS = { |
| 249 | 258 | 'May open a file': |
| 250 | 259 | ('Open',), |
| 251 | 260 | 'May write to a file (if combined with Open)': |
| 252 | - #TODO: regex to find Open+Write on same line | |
| 261 | + #TODO: regex to find Open+Write on same line | |
| 253 | 262 | ('Write', 'Put', 'Output', 'Print #'), |
| 254 | 263 | 'May read or write a binary file (if combined with Open)': |
| 255 | - #TODO: regex to find Open+Binary on same line | |
| 264 | + #TODO: regex to find Open+Binary on same line | |
| 256 | 265 | ('Binary',), |
| 257 | 266 | 'May copy a file': |
| 258 | 267 | ('FileCopy', 'CopyFile'), |
| 259 | - #FileCopy: http://msdn.microsoft.com/en-us/library/office/gg264390%28v=office.15%29.aspx | |
| 260 | - #CopyFile: http://msdn.microsoft.com/en-us/library/office/gg264089%28v=office.15%29.aspx | |
| 268 | + #FileCopy: http://msdn.microsoft.com/en-us/library/office/gg264390%28v=office.15%29.aspx | |
| 269 | + #CopyFile: http://msdn.microsoft.com/en-us/library/office/gg264089%28v=office.15%29.aspx | |
| 261 | 270 | 'May delete a file': |
| 262 | 271 | ('Kill',), |
| 263 | 272 | 'May create a text file': |
| 264 | - ('CreateTextFile','ADODB.Stream', 'WriteText', 'SaveToFile'), | |
| 265 | - #CreateTextFile: http://msdn.microsoft.com/en-us/library/office/gg264617%28v=office.15%29.aspx | |
| 266 | - #ADODB.Stream sample: http://pastebin.com/Z4TMyuq6 | |
| 273 | + ('CreateTextFile', 'ADODB.Stream', 'WriteText', 'SaveToFile'), | |
| 274 | + #CreateTextFile: http://msdn.microsoft.com/en-us/library/office/gg264617%28v=office.15%29.aspx | |
| 275 | + #ADODB.Stream sample: http://pastebin.com/Z4TMyuq6 | |
| 267 | 276 | 'May run an executable file or a system command': |
| 268 | 277 | ('Shell', 'vbNormal', 'vbNormalFocus', 'vbHide', 'vbMinimizedFocus', 'vbMaximizedFocus', 'vbNormalNoFocus', |
| 269 | 278 | 'vbMinimizedNoFocus', 'WScript.Shell', 'Run'), |
| 270 | - #Shell: http://msdn.microsoft.com/en-us/library/office/gg278437%28v=office.15%29.aspx | |
| 271 | - #WScript.Shell+Run sample: http://pastebin.com/Z4TMyuq6 | |
| 279 | + #Shell: http://msdn.microsoft.com/en-us/library/office/gg278437%28v=office.15%29.aspx | |
| 280 | + #WScript.Shell+Run sample: http://pastebin.com/Z4TMyuq6 | |
| 272 | 281 | 'May run PowerShell commands': |
| 273 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 282 | + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 274 | 283 | ('PowerShell', ), |
| 275 | 284 | 'May hide the application': |
| 276 | 285 | ('Application.Visible', 'ShowWindow', 'SW_HIDE'), |
| ... | ... | @@ -279,7 +288,7 @@ SUSPICIOUS_KEYWORDS = { |
| 279 | 288 | 'May save the current workbook': |
| 280 | 289 | ('ActiveWorkbook.SaveAs',), |
| 281 | 290 | 'May change which directory contains files to open at startup': |
| 282 | - #TODO: confirm the actual effect | |
| 291 | + #TODO: confirm the actual effect | |
| 283 | 292 | ('Application.AltStartupPath',), |
| 284 | 293 | 'May create an OLE object': |
| 285 | 294 | ('CreateObject',), |
| ... | ... | @@ -288,58 +297,58 @@ SUSPICIOUS_KEYWORDS = { |
| 288 | 297 | 'May enumerate application windows (if combined with Shell.Application object)': |
| 289 | 298 | ('Windows', 'FindWindow'), |
| 290 | 299 | 'May run code from a DLL': |
| 291 | - #TODO: regex to find declare+lib on same line | |
| 300 | + #TODO: regex to find declare+lib on same line | |
| 292 | 301 | ('Lib',), |
| 293 | 302 | 'May download files from the Internet': |
| 294 | - #TODO: regex to find urlmon+URLDownloadToFileA on same line | |
| 303 | + #TODO: regex to find urlmon+URLDownloadToFileA on same line | |
| 295 | 304 | ('URLDownloadToFileA', 'Msxml2.XMLHTTP', 'Microsoft.XMLHTTP'), |
| 296 | 305 | 'May download files from the Internet using PowerShell': |
| 297 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 306 | + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 298 | 307 | ('New-Object System.Net.WebClient', 'DownloadFile'), |
| 299 | 308 | 'May control another application by simulating user keystrokes': |
| 300 | 309 | ('SendKeys', 'AppActivate'), |
| 301 | - #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx | |
| 310 | + #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx | |
| 302 | 311 | 'May attempt to obfuscate malicious function calls': |
| 303 | 312 | ('CallByName',), |
| 304 | - #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx | |
| 313 | + #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx | |
| 305 | 314 | 'May attempt to obfuscate specific strings': |
| 306 | - #TODO: regex to find several Chr*, not just one | |
| 315 | + #TODO: regex to find several Chr*, not just one | |
| 307 | 316 | ('Chr', 'ChrB', 'ChrW', 'StrReverse', 'Xor'), |
| 308 | - #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx | |
| 317 | + #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx | |
| 309 | 318 | 'May read or write registry keys': |
| 310 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 319 | + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 311 | 320 | ('RegOpenKeyExA', 'RegOpenKeyEx', 'RegCloseKey'), |
| 312 | 321 | 'May read registry keys': |
| 313 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 322 | + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 314 | 323 | ('RegQueryValueExA', 'RegQueryValueEx', |
| 315 | 324 | 'RegRead', #with Wscript.Shell |
| 316 | - ), | |
| 325 | + ), | |
| 317 | 326 | 'May detect virtualization': |
| 318 | - # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 327 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 319 | 328 | (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'), |
| 320 | 329 | 'May detect Anubis Sandbox': |
| 321 | - # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 322 | - # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA | |
| 323 | - # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf | |
| 324 | - ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll | |
| 330 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 331 | + # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA | |
| 332 | + # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf | |
| 333 | + ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll | |
| 325 | 334 | '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId', |
| 326 | 335 | '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller' |
| 327 | 336 | ), |
| 328 | 337 | 'May detect Sandboxie': |
| 329 | - # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 330 | - # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 338 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 339 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 331 | 340 | ('SbieDll.dll', 'SandboxieControlWndClass'), |
| 332 | 341 | 'May detect Sunbelt Sandbox': |
| 333 | - # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 342 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 334 | 343 | (r'C:\file.exe',), |
| 335 | 344 | 'May detect Norman Sandbox': |
| 336 | - # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 345 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 337 | 346 | ('currentuser',), |
| 338 | 347 | 'May detect CW Sandbox': |
| 339 | - # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 348 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 340 | 349 | ('Schmidti',), |
| 341 | 350 | 'May detect WinJail Sandbox': |
| 342 | - # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 351 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 343 | 352 | ('Afx:400000:0',), |
| 344 | 353 | } |
| 345 | 354 | |
| ... | ... | @@ -355,12 +364,12 @@ DNS_NAME = r'(?:[a-zA-Z0-9\-\.]+\.' + TLD + ')' |
| 355 | 364 | #TODO: IPv6 - see https://www.debuggex.com/ |
| 356 | 365 | # A literal numeric IPv6 address may be given, but must be enclosed in [ ] e.g. [db8:0cec::99:123a] |
| 357 | 366 | NUMBER_0_255 = r'(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])' |
| 358 | -IPv4 = r'(?:'+NUMBER_0_255+r'\.){3}'+NUMBER_0_255 | |
| 367 | +IPv4 = r'(?:' + NUMBER_0_255 + r'\.){3}' + NUMBER_0_255 | |
| 359 | 368 | # IPv4 must come before the DNS name because it is more specific |
| 360 | 369 | SERVER = r'(?:' + IPv4 + '|' + DNS_NAME + ')' |
| 361 | 370 | PORT = r'(?:\:[0-9]{1,5})?' |
| 362 | 371 | SERVER_PORT = SERVER + PORT |
| 363 | -URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"] | |
| 372 | +URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"] | |
| 364 | 373 | URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH |
| 365 | 374 | re_url = re.compile(URL_RE) |
| 366 | 375 | |
| ... | ... | @@ -370,14 +379,15 @@ re_url = re.compile(URL_RE) |
| 370 | 379 | RE_PATTERNS = ( |
| 371 | 380 | ('URL', re.compile(URL_RE)), |
| 372 | 381 | ('IPv4 address', re.compile(IPv4)), |
| 373 | - ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@'+SERVER+'\b')), | |
| 382 | + ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@' + SERVER + '\b')), | |
| 374 | 383 | # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')), |
| 375 | 384 | # Executable file name with known extensions (except .com which is present in many URLs, and .application): |
| 376 | - ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|PIF|GADGET|MSI|MSP|MSC|VBS|VBE|VB|JSE|JS|WSF|WSC|WSH|WS|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1XML|PS1|PS2XML|PS2|PSC1|PSC2|SCF|LNK|INF|REG)\b")), | |
| 385 | + ("Executable file name", re.compile( | |
| 386 | + r"(?i)\b\w+\.(EXE|PIF|GADGET|MSI|MSP|MSC|VBS|VBE|VB|JSE|JS|WSF|WSC|WSH|WS|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1XML|PS1|PS2XML|PS2|PSC1|PSC2|SCF|LNK|INF|REG)\b")), | |
| 377 | 387 | # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/ |
| 378 | 388 | #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types |
| 379 | 389 | #('Hex string', re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')), |
| 380 | - ) | |
| 390 | +) | |
| 381 | 391 | |
| 382 | 392 | # regex to detect strings encoded in hexadecimal |
| 383 | 393 | re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') |
| ... | ... | @@ -385,7 +395,8 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') |
| 385 | 395 | # regex to detect strings encoded in base64 |
| 386 | 396 | #re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') |
| 387 | 397 | # better version from balbuzard, less false positives: |
| 388 | -re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"') | |
| 398 | +re_base64_string = re.compile( | |
| 399 | + r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"') | |
| 389 | 400 | # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): |
| 390 | 401 | BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit']) |
| 391 | 402 | |
| ... | ... | @@ -414,7 +425,7 @@ def copytoken_help(decompressed_current, decompressed_chunk_start): |
| 414 | 425 | return length_mask, offset_mask, bit_count, maximum_length |
| 415 | 426 | |
| 416 | 427 | |
| 417 | -def decompress_stream (compressed_container): | |
| 428 | +def decompress_stream(compressed_container): | |
| 418 | 429 | """ |
| 419 | 430 | Decompress a stream according to MS-OVBA section 2.4.1 |
| 420 | 431 | |
| ... | ... | @@ -456,7 +467,8 @@ def decompress_stream (compressed_container): |
| 456 | 467 | # 2.4.1.1.5 |
| 457 | 468 | compressed_chunk_start = compressed_current |
| 458 | 469 | # chunk header = first 16 bits |
| 459 | - compressed_chunk_header = struct.unpack("<H", compressed_container[compressed_chunk_start:compressed_chunk_start + 2])[0] | |
| 470 | + compressed_chunk_header = \ | |
| 471 | + struct.unpack("<H", compressed_container[compressed_chunk_start:compressed_chunk_start + 2])[0] | |
| 460 | 472 | # chunk size = 12 first bits of header + 3 |
| 461 | 473 | chunk_size = (compressed_chunk_header & 0x0FFF) + 3 |
| 462 | 474 | # chunk signature = 3 next bits - should always be 0b011 |
| ... | ... | @@ -510,13 +522,14 @@ def decompress_stream (compressed_container): |
| 510 | 522 | # MS-OVBA 2.4.1.3.17 Extract FlagBit |
| 511 | 523 | flag_bit = (flag_byte >> bit_index) & 1 |
| 512 | 524 | #logging.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) |
| 513 | - if flag_bit == 0: # LiteralToken | |
| 525 | + if flag_bit == 0: # LiteralToken | |
| 514 | 526 | # copy one byte directly to output |
| 515 | 527 | decompressed_container += compressed_container[compressed_current] |
| 516 | 528 | compressed_current += 1 |
| 517 | - else: # CopyToken | |
| 529 | + else: # CopyToken | |
| 518 | 530 | # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken |
| 519 | - copy_token = struct.unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0] | |
| 531 | + copy_token = \ | |
| 532 | + struct.unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0] | |
| 520 | 533 | #TODO: check this |
| 521 | 534 | length_mask, offset_mask, bit_count, maximum_length = copytoken_help( |
| 522 | 535 | len(decompressed_container), decompressed_chunk_start) |
| ... | ... | @@ -532,7 +545,7 @@ def decompress_stream (compressed_container): |
| 532 | 545 | return decompressed_container |
| 533 | 546 | |
| 534 | 547 | |
| 535 | -def _extract_vba (ole, vba_root, project_path, dir_path): | |
| 548 | +def _extract_vba(ole, vba_root, project_path, dir_path): | |
| 536 | 549 | """ |
| 537 | 550 | Extract VBA macros from an OleFileIO object. |
| 538 | 551 | Internal function, do not call directly. |
| ... | ... | @@ -649,7 +662,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): |
| 649 | 662 | check_value('PROJECTDOCSTRING_Id', 0x0005, PROJECTDOCSTRING_Id) |
| 650 | 663 | PROJECTDOCSTRING_SizeOfDocString = struct.unpack("<L", dir_stream.read(4))[0] |
| 651 | 664 | if PROJECTNAME_SizeOfProjectName > 2000: |
| 652 | - logging.error("PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString)) | |
| 665 | + logging.error( | |
| 666 | + "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString)) | |
| 653 | 667 | PROJECTDOCSTRING_DocString = dir_stream.read(PROJECTDOCSTRING_SizeOfDocString) |
| 654 | 668 | PROJECTDOCSTRING_Reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 655 | 669 | check_value('PROJECTDOCSTRING_Reserved', 0x0040, PROJECTDOCSTRING_Reserved) |
| ... | ... | @@ -663,7 +677,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): |
| 663 | 677 | check_value('PROJECTHELPFILEPATH_Id', 0x0006, PROJECTHELPFILEPATH_Id) |
| 664 | 678 | PROJECTHELPFILEPATH_SizeOfHelpFile1 = struct.unpack("<L", dir_stream.read(4))[0] |
| 665 | 679 | if PROJECTHELPFILEPATH_SizeOfHelpFile1 > 260: |
| 666 | - logging.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1)) | |
| 680 | + logging.error( | |
| 681 | + "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1)) | |
| 667 | 682 | PROJECTHELPFILEPATH_HelpFile1 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile1) |
| 668 | 683 | PROJECTHELPFILEPATH_Reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 669 | 684 | check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, PROJECTHELPFILEPATH_Reserved) |
| ... | ... | @@ -702,7 +717,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): |
| 702 | 717 | check_value('PROJECTCONSTANTS_Id', 0x000C, PROJECTCONSTANTS_Id) |
| 703 | 718 | PROJECTCONSTANTS_SizeOfConstants = struct.unpack("<L", dir_stream.read(4))[0] |
| 704 | 719 | if PROJECTCONSTANTS_SizeOfConstants > 1015: |
| 705 | - logging.error("PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants)) | |
| 720 | + logging.error( | |
| 721 | + "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants)) | |
| 706 | 722 | PROJECTCONSTANTS_Constants = dir_stream.read(PROJECTCONSTANTS_SizeOfConstants) |
| 707 | 723 | PROJECTCONSTANTS_Reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 708 | 724 | check_value('PROJECTCONSTANTS_Reserved', 0x003C, PROJECTCONSTANTS_Reserved) |
| ... | ... | @@ -740,23 +756,26 @@ def _extract_vba (ole, vba_root, project_path, dir_path): |
| 740 | 756 | if check == 0x002F: |
| 741 | 757 | # REFERENCECONTROL |
| 742 | 758 | REFERENCECONTROL_Id = check |
| 743 | - REFERENCECONTROL_SizeTwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore | |
| 759 | + REFERENCECONTROL_SizeTwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore | |
| 744 | 760 | REFERENCECONTROL_SizeOfLibidTwiddled = struct.unpack("<L", dir_stream.read(4))[0] |
| 745 | 761 | REFERENCECONTROL_LibidTwiddled = dir_stream.read(REFERENCECONTROL_SizeOfLibidTwiddled) |
| 746 | - REFERENCECONTROL_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore | |
| 762 | + REFERENCECONTROL_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore | |
| 747 | 763 | check_value('REFERENCECONTROL_Reserved1', 0x0000, REFERENCECONTROL_Reserved1) |
| 748 | - REFERENCECONTROL_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore | |
| 764 | + REFERENCECONTROL_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore | |
| 749 | 765 | check_value('REFERENCECONTROL_Reserved2', 0x0000, REFERENCECONTROL_Reserved2) |
| 750 | 766 | # optional field |
| 751 | 767 | check2 = struct.unpack("<H", dir_stream.read(2))[0] |
| 752 | 768 | if check2 == 0x0016: |
| 753 | 769 | REFERENCECONTROL_NameRecordExtended_Id = check |
| 754 | 770 | REFERENCECONTROL_NameRecordExtended_SizeofName = struct.unpack("<L", dir_stream.read(4))[0] |
| 755 | - REFERENCECONTROL_NameRecordExtended_Name = dir_stream.read(REFERENCECONTROL_NameRecordExtended_SizeofName) | |
| 771 | + REFERENCECONTROL_NameRecordExtended_Name = dir_stream.read( | |
| 772 | + REFERENCECONTROL_NameRecordExtended_SizeofName) | |
| 756 | 773 | REFERENCECONTROL_NameRecordExtended_Reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 757 | - check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, REFERENCECONTROL_NameRecordExtended_Reserved) | |
| 774 | + check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, | |
| 775 | + REFERENCECONTROL_NameRecordExtended_Reserved) | |
| 758 | 776 | REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode = struct.unpack("<L", dir_stream.read(4))[0] |
| 759 | - REFERENCECONTROL_NameRecordExtended_NameUnicode = dir_stream.read(REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode) | |
| 777 | + REFERENCECONTROL_NameRecordExtended_NameUnicode = dir_stream.read( | |
| 778 | + REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode) | |
| 760 | 779 | REFERENCECONTROL_Reserved3 = struct.unpack("<H", dir_stream.read(2))[0] |
| 761 | 780 | else: |
| 762 | 781 | REFERENCECONTROL_Reserved3 = check2 |
| ... | ... | @@ -798,7 +817,7 @@ def _extract_vba (ole, vba_root, project_path, dir_path): |
| 798 | 817 | logging.error('invalid or unknown check Id {0:04X}'.format(check)) |
| 799 | 818 | sys.exit(0) |
| 800 | 819 | |
| 801 | - PROJECTMODULES_Id = check #struct.unpack("<H", dir_stream.read(2))[0] | |
| 820 | + PROJECTMODULES_Id = check #struct.unpack("<H", dir_stream.read(2))[0] | |
| 802 | 821 | check_value('PROJECTMODULES_Id', 0x000F, PROJECTMODULES_Id) |
| 803 | 822 | PROJECTMODULES_Size = struct.unpack("<L", dir_stream.read(4))[0] |
| 804 | 823 | check_value('PROJECTMODULES_Size', 0x0002, PROJECTMODULES_Size) |
| ... | ... | @@ -878,7 +897,7 @@ def _extract_vba (ole, vba_root, project_path, dir_path): |
| 878 | 897 | MODULEPRIVATE_Reserved = struct.unpack("<L", dir_stream.read(4))[0] |
| 879 | 898 | check_value('MODULEPRIVATE_Reserved', 0x0000, MODULEPRIVATE_Reserved) |
| 880 | 899 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 881 | - if section_id == 0x002B: # TERMINATOR | |
| 900 | + if section_id == 0x002B: # TERMINATOR | |
| 882 | 901 | MODULE_Reserved = struct.unpack("<L", dir_stream.read(4))[0] |
| 883 | 902 | check_value('MODULE_Reserved', 0x0000, MODULE_Reserved) |
| 884 | 903 | section_id = None |
| ... | ... | @@ -964,9 +983,9 @@ def detect_autoexec(vba_code, obfuscation=None): |
| 964 | 983 | for keyword in keywords: |
| 965 | 984 | #TODO: if keyword is already a compiled regex, use it as-is |
| 966 | 985 | # search using regex to detect word boundaries: |
| 967 | - if re.search(r'(?i)\b'+keyword+r'\b', vba_code): | |
| 968 | - #if keyword.lower() in vba_code: | |
| 969 | - results.append((keyword, description+obf_text)) | |
| 986 | + if re.search(r'(?i)\b' + keyword + r'\b', vba_code): | |
| 987 | + #if keyword.lower() in vba_code: | |
| 988 | + results.append((keyword, description + obf_text)) | |
| 970 | 989 | return results |
| 971 | 990 | |
| 972 | 991 | |
| ... | ... | @@ -988,9 +1007,9 @@ def detect_suspicious(vba_code, obfuscation=None): |
| 988 | 1007 | for description, keywords in SUSPICIOUS_KEYWORDS.items(): |
| 989 | 1008 | for keyword in keywords: |
| 990 | 1009 | # search using regex to detect word boundaries: |
| 991 | - if re.search(r'(?i)\b'+keyword+r'\b', vba_code): | |
| 992 | - #if keyword.lower() in vba_code: | |
| 993 | - results.append((keyword, description+obf_text)) | |
| 1010 | + if re.search(r'(?i)\b' + keyword + r'\b', vba_code): | |
| 1011 | + #if keyword.lower() in vba_code: | |
| 1012 | + results.append((keyword, description + obf_text)) | |
| 994 | 1013 | return results |
| 995 | 1014 | |
| 996 | 1015 | |
| ... | ... | @@ -1011,7 +1030,7 @@ def detect_patterns(vba_code, obfuscation=None): |
| 1011 | 1030 | for match in pattern_re.finditer(vba_code): |
| 1012 | 1031 | value = match.group() |
| 1013 | 1032 | if value not in found: |
| 1014 | - results.append((pattern_type+obf_text, value)) | |
| 1033 | + results.append((pattern_type + obf_text, value)) | |
| 1015 | 1034 | found.add(value) |
| 1016 | 1035 | return results |
| 1017 | 1036 | |
| ... | ... | @@ -1070,6 +1089,7 @@ def detect_dridex_strings(vba_code): |
| 1070 | 1089 | :return: list of str tuples (encoded string, decoded string) |
| 1071 | 1090 | """ |
| 1072 | 1091 | from thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode |
| 1092 | + | |
| 1073 | 1093 | results = [] |
| 1074 | 1094 | found = set() |
| 1075 | 1095 | for match in re_dridex_string.finditer(vba_code): |
| ... | ... | @@ -1088,7 +1108,7 @@ def detect_dridex_strings(vba_code): |
| 1088 | 1108 | return results |
| 1089 | 1109 | |
| 1090 | 1110 | |
| 1091 | -class VBA_Scanner (object): | |
| 1111 | +class VBA_Scanner(object): | |
| 1092 | 1112 | """ |
| 1093 | 1113 | Class to scan the source code of a VBA module to find obfuscated strings, |
| 1094 | 1114 | suspicious keywords, IOCs, auto-executable macros, etc. |
| ... | ... | @@ -1125,35 +1145,35 @@ class VBA_Scanner (object): |
| 1125 | 1145 | if 'strreverse' in self.code.lower(): self.strReverse = True |
| 1126 | 1146 | # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: |
| 1127 | 1147 | for encoded, decoded in self.hex_strings: |
| 1128 | - self.code_hex += '\n'+decoded | |
| 1148 | + self.code_hex += '\n' + decoded | |
| 1129 | 1149 | # if the code contains "StrReverse", also append the hex strings in reverse order: |
| 1130 | 1150 | if self.strReverse: |
| 1131 | 1151 | # StrReverse after hex decoding: |
| 1132 | - self.code_hex_rev += '\n'+decoded[::-1] | |
| 1152 | + self.code_hex_rev += '\n' + decoded[::-1] | |
| 1133 | 1153 | # StrReverse before hex decoding: |
| 1134 | - self.code_rev_hex += '\n'+binascii.unhexlify(encoded[::-1]) | |
| 1154 | + self.code_rev_hex += '\n' + binascii.unhexlify(encoded[::-1]) | |
| 1135 | 1155 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ |
| 1136 | 1156 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) |
| 1137 | 1157 | # Detect Base64-encoded strings |
| 1138 | 1158 | self.base64_strings = detect_base64_strings(self.code) |
| 1139 | 1159 | for encoded, decoded in self.base64_strings: |
| 1140 | - self.code_base64 += '\n'+decoded | |
| 1160 | + self.code_base64 += '\n' + decoded | |
| 1141 | 1161 | # Detect Dridex-encoded strings |
| 1142 | 1162 | self.dridex_strings = detect_dridex_strings(self.code) |
| 1143 | 1163 | for encoded, decoded in self.dridex_strings: |
| 1144 | - self.code_dridex += '\n'+decoded | |
| 1164 | + self.code_dridex += '\n' + decoded | |
| 1145 | 1165 | results = [] |
| 1146 | 1166 | self.autoexec_keywords = [] |
| 1147 | 1167 | self.suspicious_keywords = [] |
| 1148 | 1168 | self.iocs = [] |
| 1149 | 1169 | |
| 1150 | 1170 | for code, obfuscation in ( |
| 1151 | - (self.code, None), | |
| 1152 | - (self.code_hex, 'Hex'), | |
| 1153 | - (self.code_hex_rev, 'Hex+StrReverse'), | |
| 1154 | - (self.code_rev_hex, 'StrReverse+Hex'), | |
| 1155 | - (self.code_base64, 'Base64'), | |
| 1156 | - (self.code_dridex, 'Dridex'), | |
| 1171 | + (self.code, None), | |
| 1172 | + (self.code_hex, 'Hex'), | |
| 1173 | + (self.code_hex_rev, 'Hex+StrReverse'), | |
| 1174 | + (self.code_rev_hex, 'StrReverse+Hex'), | |
| 1175 | + (self.code_base64, 'Base64'), | |
| 1176 | + (self.code_dridex, 'Dridex'), | |
| 1157 | 1177 | ): |
| 1158 | 1178 | self.autoexec_keywords += detect_autoexec(code, obfuscation) |
| 1159 | 1179 | self.suspicious_keywords += detect_suspicious(code, obfuscation) |
| ... | ... | @@ -1162,13 +1182,13 @@ class VBA_Scanner (object): |
| 1162 | 1182 | # If hex-encoded strings were discovered, add an item to suspicious keywords: |
| 1163 | 1183 | if self.hex_strings: |
| 1164 | 1184 | self.suspicious_keywords.append(('Hex Strings', |
| 1165 | - 'Hex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) | |
| 1185 | + 'Hex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) | |
| 1166 | 1186 | if self.base64_strings: |
| 1167 | 1187 | self.suspicious_keywords.append(('Base64 Strings', |
| 1168 | - 'Base64-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) | |
| 1188 | + 'Base64-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) | |
| 1169 | 1189 | if self.dridex_strings: |
| 1170 | 1190 | self.suspicious_keywords.append(('Dridex Strings', |
| 1171 | - 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) | |
| 1191 | + 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) | |
| 1172 | 1192 | for keyword, description in self.autoexec_keywords: |
| 1173 | 1193 | results.append(('AutoExec', keyword, description)) |
| 1174 | 1194 | for keyword, description in self.suspicious_keywords: |
| ... | ... | @@ -1195,9 +1215,8 @@ class VBA_Scanner (object): |
| 1195 | 1215 | """ |
| 1196 | 1216 | self.scan() |
| 1197 | 1217 | return (len(self.autoexec_keywords), len(self.suspicious_keywords), |
| 1198 | - len(self.iocs), len(self.hex_strings), len(self.base64_strings), | |
| 1199 | - len(self.dridex_strings)) | |
| 1200 | - | |
| 1218 | + len(self.iocs), len(self.hex_strings), len(self.base64_strings), | |
| 1219 | + len(self.dridex_strings)) | |
| 1201 | 1220 | |
| 1202 | 1221 | |
| 1203 | 1222 | def scan_vba(vba_code, include_decoded_strings): |
| ... | ... | @@ -1265,11 +1284,12 @@ class VBA_Parser(object): |
| 1265 | 1284 | # self.filename = '<file-like object>' |
| 1266 | 1285 | if olefile.isOleFile(_file): |
| 1267 | 1286 | # This looks like an OLE file |
| 1268 | - logging.info('Parsing OLE file %s' % self.filename) | |
| 1287 | + logging.info('Opening OLE file %s' % self.filename) | |
| 1269 | 1288 | # Open and parse the OLE file, using unicode for path names: |
| 1270 | - self.ole_file = olefile.OleFileIO(_file, path_encoding=None) | |
| 1271 | 1289 | self.type = TYPE_OLE |
| 1272 | - #TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet | |
| 1290 | + # TODO: handle OLE parsing exceptions | |
| 1291 | + self.ole_file = olefile.OleFileIO(_file, path_encoding=None) | |
| 1292 | + # TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet | |
| 1273 | 1293 | elif zipfile.is_zipfile(_file): |
| 1274 | 1294 | # This looks like a zip file, need to look for vbaProject.bin inside |
| 1275 | 1295 | # It can be any OLE file inside the archive |
| ... | ... | @@ -1279,7 +1299,7 @@ class VBA_Parser(object): |
| 1279 | 1299 | self.type = TYPE_OpenXML |
| 1280 | 1300 | z = zipfile.ZipFile(_file) |
| 1281 | 1301 | #TODO: check if this is actually an OpenXML file |
| 1282 | - #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically? | |
| 1302 | + #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically | |
| 1283 | 1303 | # check each file within the zip if it is an OLE file, by reading its magic: |
| 1284 | 1304 | for subfile in z.namelist(): |
| 1285 | 1305 | magic = z.open(subfile).read(len(olefile.MAGIC)) |
| ... | ... | @@ -1297,64 +1317,90 @@ class VBA_Parser(object): |
| 1297 | 1317 | # or a plain text file containing VBA code |
| 1298 | 1318 | if data is None: |
| 1299 | 1319 | data = open(filename, 'rb').read() |
| 1320 | + # store a lowercase version for some tests: | |
| 1321 | + data_lowercase = data.lower() | |
| 1300 | 1322 | # TODO: move each format parser to a separate method |
| 1301 | 1323 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 1302 | 1324 | if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: |
| 1303 | 1325 | logging.info('Opening Word 2003 XML file %s' % self.filename) |
| 1304 | - self.type = TYPE_Word2003_XML | |
| 1305 | - # parse the XML content | |
| 1306 | - et = ET.fromstring(data) | |
| 1307 | - # find all the binData elements: | |
| 1308 | - for bindata in et.getiterator(TAG_BINDATA): | |
| 1309 | - # the binData content is an OLE container for the VBA project, compressed | |
| 1310 | - # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | |
| 1311 | - # get the filename: | |
| 1312 | - fname = bindata.get(ATTR_NAME, 'noname.mso') | |
| 1313 | - # decode the base64 activemime | |
| 1314 | - activemime = binascii.a2b_base64(bindata.text) | |
| 1315 | - # decompress the zlib data starting at offset 0x32, which is the OLE container: | |
| 1316 | - ole_data = zlib.decompress(activemime[0x32:]) | |
| 1317 | - try: | |
| 1318 | - self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | |
| 1319 | - except: | |
| 1320 | - logging.debug('%s is not a valid OLE file' % fname) | |
| 1321 | - continue | |
| 1322 | - # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace | |
| 1323 | - # TODO: check if Word accepts data before the MIME header, if is case-sensitive, etc. | |
| 1324 | - elif data.lower().startswith('mime-version:'): | |
| 1325 | - logging.info('Opening Word MHTML file %s' % self.filename) | |
| 1326 | - self.type = TYPE_MHTML | |
| 1327 | - # parse the MIME content | |
| 1328 | - mhtml = email.message_from_string(data) | |
| 1329 | - # find all the attached files: | |
| 1330 | - for part in mhtml.walk(): | |
| 1331 | - content_type = part.get_content_type() # always returns a value | |
| 1332 | - fname = part.get_filename(None) # returns None if it fails | |
| 1333 | - logging.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type)) | |
| 1334 | - part_data = part.get_payload(decode=True) | |
| 1335 | - # VBA macros are stored in a binary file named "editdata.mso". | |
| 1336 | - # the data content is an OLE container for the VBA project, compressed | |
| 1337 | - # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | |
| 1338 | - # decompress the zlib data starting at offset 0x32, which is the OLE container: | |
| 1339 | - try: | |
| 1340 | - ole_data = zlib.decompress(part_data[0x32:]) | |
| 1341 | - except: | |
| 1342 | - logging.debug('%s is not an ActiveMime container' % fname) | |
| 1343 | - continue | |
| 1344 | - try: | |
| 1345 | - self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | |
| 1346 | - except: | |
| 1347 | - logging.debug('%s is not a valid OLE file' % fname) | |
| 1348 | - continue | |
| 1349 | - #TODO: handle exceptions | |
| 1350 | - #TODO: Excel 2003 XML | |
| 1351 | - #TODO: plain text VBA file | |
| 1352 | - else: | |
| 1353 | - msg = '%s is not an OLE nor an OpenXML file, cannot extract VBA Macros.' % self.filename | |
| 1354 | - logging.error(msg) | |
| 1355 | - raise TypeError(msg) | |
| 1356 | - | |
| 1357 | - def find_vba_projects (self): | |
| 1326 | + try: | |
| 1327 | + # parse the XML content | |
| 1328 | + # TODO: handle XML parsing exceptions | |
| 1329 | + et = ET.fromstring(data) | |
| 1330 | + # set type only if parsing succeeds | |
| 1331 | + self.type = TYPE_Word2003_XML | |
| 1332 | + # find all the binData elements: | |
| 1333 | + for bindata in et.getiterator(TAG_BINDATA): | |
| 1334 | + # the binData content is an OLE container for the VBA project, compressed | |
| 1335 | + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | |
| 1336 | + # get the filename: | |
| 1337 | + fname = bindata.get(ATTR_NAME, 'noname.mso') | |
| 1338 | + # decode the base64 activemime | |
| 1339 | + activemime = binascii.a2b_base64(bindata.text) | |
| 1340 | + # decompress the zlib data starting at offset 0x32, which is the OLE container: | |
| 1341 | + # TODO: handle different offsets => separate function | |
| 1342 | + ole_data = zlib.decompress(activemime[0x32:]) | |
| 1343 | + try: | |
| 1344 | + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | |
| 1345 | + except: | |
| 1346 | + logging.debug('%s is not a valid OLE file' % fname) | |
| 1347 | + continue | |
| 1348 | + except: | |
| 1349 | + logging.exception('Failed XML parsing for file %r' % self.filename) | |
| 1350 | + pass | |
| 1351 | + # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"): | |
| 1352 | + # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line | |
| 1353 | + # BUT Word accepts a blank line or other MIME headers inserted before, | |
| 1354 | + # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. | |
| 1355 | + # And the line is case insensitive. | |
| 1356 | + # so we'll just check the presence of mime, version and multipart anywhere: | |
| 1357 | + if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase and 'multipart' in data_lowercase: | |
| 1358 | + logging.info('Opening MHTML file %s' % self.filename) | |
| 1359 | + try: | |
| 1360 | + # parse the MIME content | |
| 1361 | + # remove any leading whitespace or newline (workaround for issue in email package) | |
| 1362 | + stripped_data = data.lstrip('\r\n\t ') | |
| 1363 | + mhtml = email.message_from_string(stripped_data) | |
| 1364 | + self.type = TYPE_MHTML | |
| 1365 | + # find all the attached files: | |
| 1366 | + for part in mhtml.walk(): | |
| 1367 | + content_type = part.get_content_type() # always returns a value | |
| 1368 | + fname = part.get_filename(None) # returns None if it fails | |
| 1369 | + logging.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type)) | |
| 1370 | + part_data = part.get_payload(decode=True) | |
| 1371 | + # VBA macros are stored in a binary file named "editdata.mso". | |
| 1372 | + # the data content is an OLE container for the VBA project, compressed | |
| 1373 | + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | |
| 1374 | + # decompress the zlib data starting at offset 0x32, which is the OLE container: | |
| 1375 | + # check ActiveMime header: | |
| 1376 | + if isinstance(part_data, str) and part_data.startswith(MSO_ACTIVEMIME_HEADER): | |
| 1377 | + logging.debug('Found ActiveMime header, decompressing MSO container') | |
| 1378 | + try: | |
| 1379 | + ole_data = zlib.decompress(part_data[0x32:]) | |
| 1380 | + try: | |
| 1381 | + # TODO: check if it is actually an OLE file | |
| 1382 | + # TODO: get the MSO filename from content_location? | |
| 1383 | + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | |
| 1384 | + except: | |
| 1385 | + logging.debug('%s is not a valid OLE file' % fname) | |
| 1386 | + except: | |
| 1387 | + logging.error('Failed decompressing an MSO container in %r - %s' | |
| 1388 | + % (fname, MSG_OLEVBA_ISSUES)) | |
| 1389 | + # TODO: bug here - need to split in smaller functions/classes? | |
| 1390 | + except: | |
| 1391 | + logging.exception('Failed MIME parsing for file %r - %s' | |
| 1392 | + % (self.filename, MSG_OLEVBA_ISSUES)) | |
| 1393 | + pass | |
| 1394 | + | |
| 1395 | + #TODO: handle exceptions | |
| 1396 | + #TODO: Excel 2003 XML | |
| 1397 | + #TODO: plain text VBA file | |
| 1398 | + if self.type is None: | |
| 1399 | + msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename | |
| 1400 | + logging.error(msg) | |
| 1401 | + raise TypeError(msg) | |
| 1402 | + | |
| 1403 | + def find_vba_projects(self): | |
| 1358 | 1404 | """ |
| 1359 | 1405 | Finds all the VBA projects stored in an OLE file. |
| 1360 | 1406 | |
| ... | ... | @@ -1465,7 +1511,7 @@ class VBA_Parser(object): |
| 1465 | 1511 | return True |
| 1466 | 1512 | |
| 1467 | 1513 | |
| 1468 | - def extract_macros (self): | |
| 1514 | + def extract_macros(self): | |
| 1469 | 1515 | """ |
| 1470 | 1516 | Extract and decompress source code for each VBA macro found in the file |
| 1471 | 1517 | |
| ... | ... | @@ -1482,7 +1528,8 @@ class VBA_Parser(object): |
| 1482 | 1528 | self.find_vba_projects() |
| 1483 | 1529 | for vba_root, project_path, dir_path in self.vba_projects: |
| 1484 | 1530 | # extract all VBA macros from that VBA root storage: |
| 1485 | - for stream_path, vba_filename, vba_code in _extract_vba(self.ole_file, vba_root, project_path, dir_path): | |
| 1531 | + for stream_path, vba_filename, vba_code in _extract_vba(self.ole_file, vba_root, project_path, | |
| 1532 | + dir_path): | |
| 1486 | 1533 | yield (self.filename, stream_path, vba_filename, vba_code) |
| 1487 | 1534 | |
| 1488 | 1535 | |
| ... | ... | @@ -1520,8 +1567,7 @@ def print_analysis(vba_code, show_decoded_strings=False): |
| 1520 | 1567 | print 'No suspicious keyword or IOC found.' |
| 1521 | 1568 | |
| 1522 | 1569 | |
| 1523 | - | |
| 1524 | -def process_file (container, filename, data, show_decoded_strings=False): | |
| 1570 | +def process_file(container, filename, data, show_decoded_strings=False): | |
| 1525 | 1571 | """ |
| 1526 | 1572 | Process a single file |
| 1527 | 1573 | |
| ... | ... | @@ -1536,7 +1582,7 @@ def process_file (container, filename, data, show_decoded_strings=False): |
| 1536 | 1582 | display_filename = '%s in %s' % (filename, container) |
| 1537 | 1583 | else: |
| 1538 | 1584 | display_filename = filename |
| 1539 | - print '='*79 | |
| 1585 | + print '=' * 79 | |
| 1540 | 1586 | print 'FILE:', display_filename |
| 1541 | 1587 | try: |
| 1542 | 1588 | #TODO: handle olefile errors, when an OLE file is malformed |
| ... | ... | @@ -1548,22 +1594,22 @@ def process_file (container, filename, data, show_decoded_strings=False): |
| 1548 | 1594 | # hide attribute lines: |
| 1549 | 1595 | #TODO: option to disable attribute filtering |
| 1550 | 1596 | vba_code_filtered = filter_vba(vba_code) |
| 1551 | - print '-'*79 | |
| 1597 | + print '-' * 79 | |
| 1552 | 1598 | print 'VBA MACRO %s ' % vba_filename |
| 1553 | 1599 | print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) |
| 1554 | - print '- '*39 | |
| 1600 | + print '- ' * 39 | |
| 1555 | 1601 | # detect empty macros: |
| 1556 | 1602 | if vba_code_filtered.strip() == '': |
| 1557 | 1603 | print '(empty macro)' |
| 1558 | 1604 | else: |
| 1559 | 1605 | print vba_code_filtered |
| 1560 | - print '- '*39 | |
| 1606 | + print '- ' * 39 | |
| 1561 | 1607 | print 'ANALYSIS:' |
| 1562 | 1608 | # analyse the whole code, filtered to avoid false positives: |
| 1563 | 1609 | print_analysis(vba_code_filtered, show_decoded_strings) |
| 1564 | 1610 | else: |
| 1565 | 1611 | print 'No VBA macros found.' |
| 1566 | - except: #TypeError: | |
| 1612 | + except: #TypeError: | |
| 1567 | 1613 | #raise |
| 1568 | 1614 | #TODO: print more info if debug mode |
| 1569 | 1615 | #print sys.exc_value |
| ... | ... | @@ -1572,7 +1618,7 @@ def process_file (container, filename, data, show_decoded_strings=False): |
| 1572 | 1618 | print '' |
| 1573 | 1619 | |
| 1574 | 1620 | |
| 1575 | -def process_file_triage (container, filename, data): | |
| 1621 | +def process_file_triage(container, filename, data): | |
| 1576 | 1622 | """ |
| 1577 | 1623 | Process a single file |
| 1578 | 1624 | |
| ... | ... | @@ -1624,7 +1670,7 @@ def process_file_triage (container, filename, data): |
| 1624 | 1670 | if nb_base64strings: base64obf = 'B' |
| 1625 | 1671 | if nb_dridexstrings: dridex = 'D' |
| 1626 | 1672 | flags += '%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, |
| 1627 | - base64obf, dridex) | |
| 1673 | + base64obf, dridex) | |
| 1628 | 1674 | |
| 1629 | 1675 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' |
| 1630 | 1676 | # if nb_macros: macros = 'YES:%d' % nb_macros |
| ... | ... | @@ -1663,9 +1709,11 @@ def process_file_triage (container, filename, data): |
| 1663 | 1709 | # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings)) |
| 1664 | 1710 | # print t |
| 1665 | 1711 | |
| 1712 | + | |
| 1666 | 1713 | def main_triage_quick(): |
| 1667 | 1714 | pass |
| 1668 | 1715 | |
| 1716 | + | |
| 1669 | 1717 | #=== MAIN ===================================================================== |
| 1670 | 1718 | |
| 1671 | 1719 | def main(): |
| ... | ... | @@ -1679,19 +1727,19 @@ def main(): |
| 1679 | 1727 | # parser.add_option('-c', '--csv', dest='csv', |
| 1680 | 1728 | # help='export results to a CSV file') |
| 1681 | 1729 | parser.add_option("-r", action="store_true", dest="recursive", |
| 1682 | - help='find files recursively in subdirectories.') | |
| 1730 | + help='find files recursively in subdirectories.') | |
| 1683 | 1731 | parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, |
| 1684 | - help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') | |
| 1732 | + help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') | |
| 1685 | 1733 | parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', |
| 1686 | - help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') | |
| 1734 | + help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') | |
| 1687 | 1735 | parser.add_option("-t", action="store_true", dest="triage_mode", |
| 1688 | - help='triage mode, display results as a summary table (default for multiple files)') | |
| 1736 | + help='triage mode, display results as a summary table (default for multiple files)') | |
| 1689 | 1737 | parser.add_option("-d", action="store_true", dest="detailed_mode", |
| 1690 | - help='detailed mode, display full results (default for single file)') | |
| 1738 | + help='detailed mode, display full results (default for single file)') | |
| 1691 | 1739 | parser.add_option("-i", "--input", dest='input', type='str', default=None, |
| 1692 | - help='input file containing VBA source code to be analyzed (no parsing)') | |
| 1740 | + help='input file containing VBA source code to be analyzed (no parsing)') | |
| 1693 | 1741 | parser.add_option("--decode", action="store_true", dest="show_decoded_strings", |
| 1694 | - help='display all the obfuscated strings with their decoded content (Hex, Base64, StrReverse, Dridex).') | |
| 1742 | + help='display all the obfuscated strings with their decoded content (Hex, Base64, StrReverse, Dridex).') | |
| 1695 | 1743 | |
| 1696 | 1744 | (options, args) = parser.parse_args() |
| 1697 | 1745 | |
| ... | ... | @@ -1705,9 +1753,9 @@ def main(): |
| 1705 | 1753 | print 'olevba %s - http://decalage.info/python/oletools' % __version__ |
| 1706 | 1754 | |
| 1707 | 1755 | # TODO: option to set logging level, none by default |
| 1708 | - logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) | |
| 1756 | + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) #.WARNING) #INFO) | |
| 1709 | 1757 | # For now, all logging is disabled: |
| 1710 | - logging.disable(logging.CRITICAL) | |
| 1758 | + #logging.disable(logging.CRITICAL) | |
| 1711 | 1759 | |
| 1712 | 1760 | if options.input: |
| 1713 | 1761 | # input file provided with VBA source code to be analyzed directly: |
| ... | ... | @@ -1720,12 +1768,12 @@ def main(): |
| 1720 | 1768 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7) |
| 1721 | 1769 | if not options.detailed_mode or options.triage_mode: |
| 1722 | 1770 | print '%-11s %-65s' % ('Flags', 'Filename') |
| 1723 | - print '%-11s %-65s' % ('-'*11, '-'*65) | |
| 1771 | + print '%-11s %-65s' % ('-' * 11, '-' * 65) | |
| 1724 | 1772 | previous_container = None |
| 1725 | 1773 | count = 0 |
| 1726 | 1774 | container = filename = data = None |
| 1727 | 1775 | for container, filename, data in xglob.iter_files(args, recursive=options.recursive, |
| 1728 | - zip_password=options.zip_password, zip_fname=options.zip_fname): | |
| 1776 | + zip_password=options.zip_password, zip_fname=options.zip_fname): | |
| 1729 | 1777 | # ignore directory names stored in zip files: |
| 1730 | 1778 | if container and filename.endswith('/'): |
| 1731 | 1779 | continue |
| ... | ... | @@ -1749,7 +1797,8 @@ def main(): |
| 1749 | 1797 | #TODO: avoid doing the analysis twice by storing results |
| 1750 | 1798 | process_file(container, filename, data, show_decoded_strings=options.show_decoded_strings) |
| 1751 | 1799 | |
| 1800 | + | |
| 1752 | 1801 | if __name__ == '__main__': |
| 1753 | 1802 | main() |
| 1754 | 1803 | |
| 1755 | -# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness | |
| 1756 | 1804 | \ No newline at end of file |
| 1805 | + # This was coded while listening to "Dust" from I Love You But I've Chosen Darkness | |
| 1757 | 1806 | \ No newline at end of file | ... | ... |