Commit bdad8c146df8e66815562272b6a00f980a359e0e
1 parent
fad632c5
improved support for MHTML files with modified header: fixed issue #11
Showing
1 changed file
with
217 additions
and
168 deletions
oletools/olevba.py
| @@ -23,7 +23,7 @@ olevba is based on source code from officeparser by John William Davison | @@ -23,7 +23,7 @@ olevba is based on source code from officeparser by John William Davison | ||
| 23 | https://github.com/unixfreak0037/officeparser | 23 | https://github.com/unixfreak0037/officeparser |
| 24 | """ | 24 | """ |
| 25 | 25 | ||
| 26 | -#=== LICENSE ================================================================== | 26 | +# === LICENSE ================================================================== |
| 27 | 27 | ||
| 28 | # olevba is copyright (c) 2014-2015 Philippe Lagadec (http://www.decalage.info) | 28 | # olevba is copyright (c) 2014-2015 Philippe Lagadec (http://www.decalage.info) |
| 29 | # All rights reserved. | 29 | # All rights reserved. |
| @@ -130,8 +130,10 @@ https://github.com/unixfreak0037/officeparser | @@ -130,8 +130,10 @@ https://github.com/unixfreak0037/officeparser | ||
| 130 | # virtualisation detection | 130 | # virtualisation detection |
| 131 | # 2015-05-06 v0.27 PL: - added support for MHTML files with VBA macros | 131 | # 2015-05-06 v0.27 PL: - added support for MHTML files with VBA macros |
| 132 | # (issue #10 reported by Greg from SpamStopsHere) | 132 | # (issue #10 reported by Greg from SpamStopsHere) |
| 133 | +# 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header | ||
| 134 | +# (issue #11 reported by Thomas Chopitea) | ||
| 133 | 135 | ||
| 134 | -__version__ = '0.27' | 136 | +__version__ = '0.28' |
| 135 | 137 | ||
| 136 | #------------------------------------------------------------------------------ | 138 | #------------------------------------------------------------------------------ |
| 137 | # TODO: | 139 | # TODO: |
| @@ -178,7 +180,7 @@ import binascii | @@ -178,7 +180,7 @@ import binascii | ||
| 178 | import base64 | 180 | import base64 |
| 179 | import traceback | 181 | import traceback |
| 180 | import zlib | 182 | import zlib |
| 181 | -import email # for MHTML parsing | 183 | +import email # for MHTML parsing |
| 182 | 184 | ||
| 183 | # import lxml or ElementTree for XML parsing: | 185 | # import lxml or ElementTree for XML parsing: |
| 184 | try: | 186 | try: |
| @@ -193,9 +195,9 @@ except ImportError: | @@ -193,9 +195,9 @@ except ImportError: | ||
| 193 | # Python <2.5: standalone ElementTree install | 195 | # Python <2.5: standalone ElementTree install |
| 194 | import elementtree.cElementTree as ET | 196 | import elementtree.cElementTree as ET |
| 195 | except ImportError: | 197 | except ImportError: |
| 196 | - raise ImportError, "lxml or ElementTree are not installed, "\ | ||
| 197 | - +"see http://codespeak.net/lxml "\ | ||
| 198 | - +"or http://effbot.org/zone/element-index.htm" | 198 | + raise ImportError, "lxml or ElementTree are not installed, " \ |
| 199 | + + "see http://codespeak.net/lxml " \ | ||
| 200 | + + "or http://effbot.org/zone/element-index.htm" | ||
| 199 | 201 | ||
| 200 | import thirdparty.olefile as olefile | 202 | import thirdparty.olefile as olefile |
| 201 | from thirdparty.prettytable import prettytable | 203 | from thirdparty.prettytable import prettytable |
| @@ -203,12 +205,19 @@ from thirdparty.xglob import xglob | @@ -203,12 +205,19 @@ from thirdparty.xglob import xglob | ||
| 203 | 205 | ||
| 204 | #--- CONSTANTS ---------------------------------------------------------------- | 206 | #--- CONSTANTS ---------------------------------------------------------------- |
| 205 | 207 | ||
| 208 | +# URL and message to report issues: | ||
| 209 | +URL_OLEVBA_ISSUES = 'https://bitbucket.org/decalage/oletools/issues' | ||
| 210 | +MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES | ||
| 211 | + | ||
| 206 | # Container types: | 212 | # Container types: |
| 207 | -TYPE_OLE = 'OLE' | 213 | +TYPE_OLE = 'OLE' |
| 208 | TYPE_OpenXML = 'OpenXML' | 214 | TYPE_OpenXML = 'OpenXML' |
| 209 | TYPE_Word2003_XML = 'Word2003_XML' | 215 | TYPE_Word2003_XML = 'Word2003_XML' |
| 210 | TYPE_MHTML = 'MHTML' | 216 | TYPE_MHTML = 'MHTML' |
| 211 | 217 | ||
| 218 | +# MSO files ActiveMime header magic | ||
| 219 | +MSO_ACTIVEMIME_HEADER = 'ActiveMime' | ||
| 220 | + | ||
| 212 | MODULE_EXTENSION = "bas" | 221 | MODULE_EXTENSION = "bas" |
| 213 | CLASS_EXTENSION = "cls" | 222 | CLASS_EXTENSION = "cls" |
| 214 | FORM_EXTENSION = "frm" | 223 | FORM_EXTENSION = "frm" |
| @@ -249,28 +258,28 @@ SUSPICIOUS_KEYWORDS = { | @@ -249,28 +258,28 @@ SUSPICIOUS_KEYWORDS = { | ||
| 249 | 'May open a file': | 258 | 'May open a file': |
| 250 | ('Open',), | 259 | ('Open',), |
| 251 | 'May write to a file (if combined with Open)': | 260 | 'May write to a file (if combined with Open)': |
| 252 | - #TODO: regex to find Open+Write on same line | 261 | + #TODO: regex to find Open+Write on same line |
| 253 | ('Write', 'Put', 'Output', 'Print #'), | 262 | ('Write', 'Put', 'Output', 'Print #'), |
| 254 | 'May read or write a binary file (if combined with Open)': | 263 | 'May read or write a binary file (if combined with Open)': |
| 255 | - #TODO: regex to find Open+Binary on same line | 264 | + #TODO: regex to find Open+Binary on same line |
| 256 | ('Binary',), | 265 | ('Binary',), |
| 257 | 'May copy a file': | 266 | 'May copy a file': |
| 258 | ('FileCopy', 'CopyFile'), | 267 | ('FileCopy', 'CopyFile'), |
| 259 | - #FileCopy: http://msdn.microsoft.com/en-us/library/office/gg264390%28v=office.15%29.aspx | ||
| 260 | - #CopyFile: http://msdn.microsoft.com/en-us/library/office/gg264089%28v=office.15%29.aspx | 268 | + #FileCopy: http://msdn.microsoft.com/en-us/library/office/gg264390%28v=office.15%29.aspx |
| 269 | + #CopyFile: http://msdn.microsoft.com/en-us/library/office/gg264089%28v=office.15%29.aspx | ||
| 261 | 'May delete a file': | 270 | 'May delete a file': |
| 262 | ('Kill',), | 271 | ('Kill',), |
| 263 | 'May create a text file': | 272 | 'May create a text file': |
| 264 | - ('CreateTextFile','ADODB.Stream', 'WriteText', 'SaveToFile'), | ||
| 265 | - #CreateTextFile: http://msdn.microsoft.com/en-us/library/office/gg264617%28v=office.15%29.aspx | ||
| 266 | - #ADODB.Stream sample: http://pastebin.com/Z4TMyuq6 | 273 | + ('CreateTextFile', 'ADODB.Stream', 'WriteText', 'SaveToFile'), |
| 274 | + #CreateTextFile: http://msdn.microsoft.com/en-us/library/office/gg264617%28v=office.15%29.aspx | ||
| 275 | + #ADODB.Stream sample: http://pastebin.com/Z4TMyuq6 | ||
| 267 | 'May run an executable file or a system command': | 276 | 'May run an executable file or a system command': |
| 268 | ('Shell', 'vbNormal', 'vbNormalFocus', 'vbHide', 'vbMinimizedFocus', 'vbMaximizedFocus', 'vbNormalNoFocus', | 277 | ('Shell', 'vbNormal', 'vbNormalFocus', 'vbHide', 'vbMinimizedFocus', 'vbMaximizedFocus', 'vbNormalNoFocus', |
| 269 | 'vbMinimizedNoFocus', 'WScript.Shell', 'Run'), | 278 | 'vbMinimizedNoFocus', 'WScript.Shell', 'Run'), |
| 270 | - #Shell: http://msdn.microsoft.com/en-us/library/office/gg278437%28v=office.15%29.aspx | ||
| 271 | - #WScript.Shell+Run sample: http://pastebin.com/Z4TMyuq6 | 279 | + #Shell: http://msdn.microsoft.com/en-us/library/office/gg278437%28v=office.15%29.aspx |
| 280 | + #WScript.Shell+Run sample: http://pastebin.com/Z4TMyuq6 | ||
| 272 | 'May run PowerShell commands': | 281 | 'May run PowerShell commands': |
| 273 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | 282 | + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ |
| 274 | ('PowerShell', ), | 283 | ('PowerShell', ), |
| 275 | 'May hide the application': | 284 | 'May hide the application': |
| 276 | ('Application.Visible', 'ShowWindow', 'SW_HIDE'), | 285 | ('Application.Visible', 'ShowWindow', 'SW_HIDE'), |
| @@ -279,7 +288,7 @@ SUSPICIOUS_KEYWORDS = { | @@ -279,7 +288,7 @@ SUSPICIOUS_KEYWORDS = { | ||
| 279 | 'May save the current workbook': | 288 | 'May save the current workbook': |
| 280 | ('ActiveWorkbook.SaveAs',), | 289 | ('ActiveWorkbook.SaveAs',), |
| 281 | 'May change which directory contains files to open at startup': | 290 | 'May change which directory contains files to open at startup': |
| 282 | - #TODO: confirm the actual effect | 291 | + #TODO: confirm the actual effect |
| 283 | ('Application.AltStartupPath',), | 292 | ('Application.AltStartupPath',), |
| 284 | 'May create an OLE object': | 293 | 'May create an OLE object': |
| 285 | ('CreateObject',), | 294 | ('CreateObject',), |
| @@ -288,58 +297,58 @@ SUSPICIOUS_KEYWORDS = { | @@ -288,58 +297,58 @@ SUSPICIOUS_KEYWORDS = { | ||
| 288 | 'May enumerate application windows (if combined with Shell.Application object)': | 297 | 'May enumerate application windows (if combined with Shell.Application object)': |
| 289 | ('Windows', 'FindWindow'), | 298 | ('Windows', 'FindWindow'), |
| 290 | 'May run code from a DLL': | 299 | 'May run code from a DLL': |
| 291 | - #TODO: regex to find declare+lib on same line | 300 | + #TODO: regex to find declare+lib on same line |
| 292 | ('Lib',), | 301 | ('Lib',), |
| 293 | 'May download files from the Internet': | 302 | 'May download files from the Internet': |
| 294 | - #TODO: regex to find urlmon+URLDownloadToFileA on same line | 303 | + #TODO: regex to find urlmon+URLDownloadToFileA on same line |
| 295 | ('URLDownloadToFileA', 'Msxml2.XMLHTTP', 'Microsoft.XMLHTTP'), | 304 | ('URLDownloadToFileA', 'Msxml2.XMLHTTP', 'Microsoft.XMLHTTP'), |
| 296 | 'May download files from the Internet using PowerShell': | 305 | 'May download files from the Internet using PowerShell': |
| 297 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | 306 | + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ |
| 298 | ('New-Object System.Net.WebClient', 'DownloadFile'), | 307 | ('New-Object System.Net.WebClient', 'DownloadFile'), |
| 299 | 'May control another application by simulating user keystrokes': | 308 | 'May control another application by simulating user keystrokes': |
| 300 | ('SendKeys', 'AppActivate'), | 309 | ('SendKeys', 'AppActivate'), |
| 301 | - #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx | 310 | + #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx |
| 302 | 'May attempt to obfuscate malicious function calls': | 311 | 'May attempt to obfuscate malicious function calls': |
| 303 | ('CallByName',), | 312 | ('CallByName',), |
| 304 | - #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx | 313 | + #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx |
| 305 | 'May attempt to obfuscate specific strings': | 314 | 'May attempt to obfuscate specific strings': |
| 306 | - #TODO: regex to find several Chr*, not just one | 315 | + #TODO: regex to find several Chr*, not just one |
| 307 | ('Chr', 'ChrB', 'ChrW', 'StrReverse', 'Xor'), | 316 | ('Chr', 'ChrB', 'ChrW', 'StrReverse', 'Xor'), |
| 308 | - #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx | 317 | + #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx |
| 309 | 'May read or write registry keys': | 318 | 'May read or write registry keys': |
| 310 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | 319 | + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ |
| 311 | ('RegOpenKeyExA', 'RegOpenKeyEx', 'RegCloseKey'), | 320 | ('RegOpenKeyExA', 'RegOpenKeyEx', 'RegCloseKey'), |
| 312 | 'May read registry keys': | 321 | 'May read registry keys': |
| 313 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | 322 | + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ |
| 314 | ('RegQueryValueExA', 'RegQueryValueEx', | 323 | ('RegQueryValueExA', 'RegQueryValueEx', |
| 315 | 'RegRead', #with Wscript.Shell | 324 | 'RegRead', #with Wscript.Shell |
| 316 | - ), | 325 | + ), |
| 317 | 'May detect virtualization': | 326 | 'May detect virtualization': |
| 318 | - # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | 327 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ |
| 319 | (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'), | 328 | (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'), |
| 320 | 'May detect Anubis Sandbox': | 329 | 'May detect Anubis Sandbox': |
| 321 | - # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | ||
| 322 | - # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA | ||
| 323 | - # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf | ||
| 324 | - ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll | 330 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ |
| 331 | + # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA | ||
| 332 | + # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf | ||
| 333 | + ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll | ||
| 325 | '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId', | 334 | '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId', |
| 326 | '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller' | 335 | '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller' |
| 327 | ), | 336 | ), |
| 328 | 'May detect Sandboxie': | 337 | 'May detect Sandboxie': |
| 329 | - # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | ||
| 330 | - # ref: http://www.cplusplus.com/forum/windows/96874/ | 338 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ |
| 339 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | ||
| 331 | ('SbieDll.dll', 'SandboxieControlWndClass'), | 340 | ('SbieDll.dll', 'SandboxieControlWndClass'), |
| 332 | 'May detect Sunbelt Sandbox': | 341 | 'May detect Sunbelt Sandbox': |
| 333 | - # ref: http://www.cplusplus.com/forum/windows/96874/ | 342 | + # ref: http://www.cplusplus.com/forum/windows/96874/ |
| 334 | (r'C:\file.exe',), | 343 | (r'C:\file.exe',), |
| 335 | 'May detect Norman Sandbox': | 344 | 'May detect Norman Sandbox': |
| 336 | - # ref: http://www.cplusplus.com/forum/windows/96874/ | 345 | + # ref: http://www.cplusplus.com/forum/windows/96874/ |
| 337 | ('currentuser',), | 346 | ('currentuser',), |
| 338 | 'May detect CW Sandbox': | 347 | 'May detect CW Sandbox': |
| 339 | - # ref: http://www.cplusplus.com/forum/windows/96874/ | 348 | + # ref: http://www.cplusplus.com/forum/windows/96874/ |
| 340 | ('Schmidti',), | 349 | ('Schmidti',), |
| 341 | 'May detect WinJail Sandbox': | 350 | 'May detect WinJail Sandbox': |
| 342 | - # ref: http://www.cplusplus.com/forum/windows/96874/ | 351 | + # ref: http://www.cplusplus.com/forum/windows/96874/ |
| 343 | ('Afx:400000:0',), | 352 | ('Afx:400000:0',), |
| 344 | } | 353 | } |
| 345 | 354 | ||
| @@ -355,12 +364,12 @@ DNS_NAME = r'(?:[a-zA-Z0-9\-\.]+\.' + TLD + ')' | @@ -355,12 +364,12 @@ DNS_NAME = r'(?:[a-zA-Z0-9\-\.]+\.' + TLD + ')' | ||
| 355 | #TODO: IPv6 - see https://www.debuggex.com/ | 364 | #TODO: IPv6 - see https://www.debuggex.com/ |
| 356 | # A literal numeric IPv6 address may be given, but must be enclosed in [ ] e.g. [db8:0cec::99:123a] | 365 | # A literal numeric IPv6 address may be given, but must be enclosed in [ ] e.g. [db8:0cec::99:123a] |
| 357 | NUMBER_0_255 = r'(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])' | 366 | NUMBER_0_255 = r'(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])' |
| 358 | -IPv4 = r'(?:'+NUMBER_0_255+r'\.){3}'+NUMBER_0_255 | 367 | +IPv4 = r'(?:' + NUMBER_0_255 + r'\.){3}' + NUMBER_0_255 |
| 359 | # IPv4 must come before the DNS name because it is more specific | 368 | # IPv4 must come before the DNS name because it is more specific |
| 360 | SERVER = r'(?:' + IPv4 + '|' + DNS_NAME + ')' | 369 | SERVER = r'(?:' + IPv4 + '|' + DNS_NAME + ')' |
| 361 | PORT = r'(?:\:[0-9]{1,5})?' | 370 | PORT = r'(?:\:[0-9]{1,5})?' |
| 362 | SERVER_PORT = SERVER + PORT | 371 | SERVER_PORT = SERVER + PORT |
| 363 | -URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"] | 372 | +URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"] |
| 364 | URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH | 373 | URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH |
| 365 | re_url = re.compile(URL_RE) | 374 | re_url = re.compile(URL_RE) |
| 366 | 375 | ||
| @@ -370,14 +379,15 @@ re_url = re.compile(URL_RE) | @@ -370,14 +379,15 @@ re_url = re.compile(URL_RE) | ||
| 370 | RE_PATTERNS = ( | 379 | RE_PATTERNS = ( |
| 371 | ('URL', re.compile(URL_RE)), | 380 | ('URL', re.compile(URL_RE)), |
| 372 | ('IPv4 address', re.compile(IPv4)), | 381 | ('IPv4 address', re.compile(IPv4)), |
| 373 | - ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@'+SERVER+'\b')), | 382 | + ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@' + SERVER + '\b')), |
| 374 | # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')), | 383 | # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')), |
| 375 | # Executable file name with known extensions (except .com which is present in many URLs, and .application): | 384 | # Executable file name with known extensions (except .com which is present in many URLs, and .application): |
| 376 | - ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|PIF|GADGET|MSI|MSP|MSC|VBS|VBE|VB|JSE|JS|WSF|WSC|WSH|WS|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1XML|PS1|PS2XML|PS2|PSC1|PSC2|SCF|LNK|INF|REG)\b")), | 385 | + ("Executable file name", re.compile( |
| 386 | + r"(?i)\b\w+\.(EXE|PIF|GADGET|MSI|MSP|MSC|VBS|VBE|VB|JSE|JS|WSF|WSC|WSH|WS|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1XML|PS1|PS2XML|PS2|PSC1|PSC2|SCF|LNK|INF|REG)\b")), | ||
| 377 | # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/ | 387 | # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/ |
| 378 | #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types | 388 | #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types |
| 379 | #('Hex string', re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')), | 389 | #('Hex string', re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')), |
| 380 | - ) | 390 | +) |
| 381 | 391 | ||
| 382 | # regex to detect strings encoded in hexadecimal | 392 | # regex to detect strings encoded in hexadecimal |
| 383 | re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | 393 | re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') |
| @@ -385,7 +395,8 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | @@ -385,7 +395,8 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | ||
| 385 | # regex to detect strings encoded in base64 | 395 | # regex to detect strings encoded in base64 |
| 386 | #re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') | 396 | #re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') |
| 387 | # better version from balbuzard, less false positives: | 397 | # better version from balbuzard, less false positives: |
| 388 | -re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"') | 398 | +re_base64_string = re.compile( |
| 399 | + r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"') | ||
| 389 | # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): | 400 | # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): |
| 390 | BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit']) | 401 | BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit']) |
| 391 | 402 | ||
| @@ -414,7 +425,7 @@ def copytoken_help(decompressed_current, decompressed_chunk_start): | @@ -414,7 +425,7 @@ def copytoken_help(decompressed_current, decompressed_chunk_start): | ||
| 414 | return length_mask, offset_mask, bit_count, maximum_length | 425 | return length_mask, offset_mask, bit_count, maximum_length |
| 415 | 426 | ||
| 416 | 427 | ||
| 417 | -def decompress_stream (compressed_container): | 428 | +def decompress_stream(compressed_container): |
| 418 | """ | 429 | """ |
| 419 | Decompress a stream according to MS-OVBA section 2.4.1 | 430 | Decompress a stream according to MS-OVBA section 2.4.1 |
| 420 | 431 | ||
| @@ -456,7 +467,8 @@ def decompress_stream (compressed_container): | @@ -456,7 +467,8 @@ def decompress_stream (compressed_container): | ||
| 456 | # 2.4.1.1.5 | 467 | # 2.4.1.1.5 |
| 457 | compressed_chunk_start = compressed_current | 468 | compressed_chunk_start = compressed_current |
| 458 | # chunk header = first 16 bits | 469 | # chunk header = first 16 bits |
| 459 | - compressed_chunk_header = struct.unpack("<H", compressed_container[compressed_chunk_start:compressed_chunk_start + 2])[0] | 470 | + compressed_chunk_header = \ |
| 471 | + struct.unpack("<H", compressed_container[compressed_chunk_start:compressed_chunk_start + 2])[0] | ||
| 460 | # chunk size = 12 first bits of header + 3 | 472 | # chunk size = 12 first bits of header + 3 |
| 461 | chunk_size = (compressed_chunk_header & 0x0FFF) + 3 | 473 | chunk_size = (compressed_chunk_header & 0x0FFF) + 3 |
| 462 | # chunk signature = 3 next bits - should always be 0b011 | 474 | # chunk signature = 3 next bits - should always be 0b011 |
| @@ -510,13 +522,14 @@ def decompress_stream (compressed_container): | @@ -510,13 +522,14 @@ def decompress_stream (compressed_container): | ||
| 510 | # MS-OVBA 2.4.1.3.17 Extract FlagBit | 522 | # MS-OVBA 2.4.1.3.17 Extract FlagBit |
| 511 | flag_bit = (flag_byte >> bit_index) & 1 | 523 | flag_bit = (flag_byte >> bit_index) & 1 |
| 512 | #logging.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) | 524 | #logging.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) |
| 513 | - if flag_bit == 0: # LiteralToken | 525 | + if flag_bit == 0: # LiteralToken |
| 514 | # copy one byte directly to output | 526 | # copy one byte directly to output |
| 515 | decompressed_container += compressed_container[compressed_current] | 527 | decompressed_container += compressed_container[compressed_current] |
| 516 | compressed_current += 1 | 528 | compressed_current += 1 |
| 517 | - else: # CopyToken | 529 | + else: # CopyToken |
| 518 | # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken | 530 | # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken |
| 519 | - copy_token = struct.unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0] | 531 | + copy_token = \ |
| 532 | + struct.unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0] | ||
| 520 | #TODO: check this | 533 | #TODO: check this |
| 521 | length_mask, offset_mask, bit_count, maximum_length = copytoken_help( | 534 | length_mask, offset_mask, bit_count, maximum_length = copytoken_help( |
| 522 | len(decompressed_container), decompressed_chunk_start) | 535 | len(decompressed_container), decompressed_chunk_start) |
| @@ -532,7 +545,7 @@ def decompress_stream (compressed_container): | @@ -532,7 +545,7 @@ def decompress_stream (compressed_container): | ||
| 532 | return decompressed_container | 545 | return decompressed_container |
| 533 | 546 | ||
| 534 | 547 | ||
| 535 | -def _extract_vba (ole, vba_root, project_path, dir_path): | 548 | +def _extract_vba(ole, vba_root, project_path, dir_path): |
| 536 | """ | 549 | """ |
| 537 | Extract VBA macros from an OleFileIO object. | 550 | Extract VBA macros from an OleFileIO object. |
| 538 | Internal function, do not call directly. | 551 | Internal function, do not call directly. |
| @@ -649,7 +662,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | @@ -649,7 +662,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | ||
| 649 | check_value('PROJECTDOCSTRING_Id', 0x0005, PROJECTDOCSTRING_Id) | 662 | check_value('PROJECTDOCSTRING_Id', 0x0005, PROJECTDOCSTRING_Id) |
| 650 | PROJECTDOCSTRING_SizeOfDocString = struct.unpack("<L", dir_stream.read(4))[0] | 663 | PROJECTDOCSTRING_SizeOfDocString = struct.unpack("<L", dir_stream.read(4))[0] |
| 651 | if PROJECTNAME_SizeOfProjectName > 2000: | 664 | if PROJECTNAME_SizeOfProjectName > 2000: |
| 652 | - logging.error("PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString)) | 665 | + logging.error( |
| 666 | + "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString)) | ||
| 653 | PROJECTDOCSTRING_DocString = dir_stream.read(PROJECTDOCSTRING_SizeOfDocString) | 667 | PROJECTDOCSTRING_DocString = dir_stream.read(PROJECTDOCSTRING_SizeOfDocString) |
| 654 | PROJECTDOCSTRING_Reserved = struct.unpack("<H", dir_stream.read(2))[0] | 668 | PROJECTDOCSTRING_Reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 655 | check_value('PROJECTDOCSTRING_Reserved', 0x0040, PROJECTDOCSTRING_Reserved) | 669 | check_value('PROJECTDOCSTRING_Reserved', 0x0040, PROJECTDOCSTRING_Reserved) |
| @@ -663,7 +677,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | @@ -663,7 +677,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | ||
| 663 | check_value('PROJECTHELPFILEPATH_Id', 0x0006, PROJECTHELPFILEPATH_Id) | 677 | check_value('PROJECTHELPFILEPATH_Id', 0x0006, PROJECTHELPFILEPATH_Id) |
| 664 | PROJECTHELPFILEPATH_SizeOfHelpFile1 = struct.unpack("<L", dir_stream.read(4))[0] | 678 | PROJECTHELPFILEPATH_SizeOfHelpFile1 = struct.unpack("<L", dir_stream.read(4))[0] |
| 665 | if PROJECTHELPFILEPATH_SizeOfHelpFile1 > 260: | 679 | if PROJECTHELPFILEPATH_SizeOfHelpFile1 > 260: |
| 666 | - logging.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1)) | 680 | + logging.error( |
| 681 | + "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1)) | ||
| 667 | PROJECTHELPFILEPATH_HelpFile1 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile1) | 682 | PROJECTHELPFILEPATH_HelpFile1 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile1) |
| 668 | PROJECTHELPFILEPATH_Reserved = struct.unpack("<H", dir_stream.read(2))[0] | 683 | PROJECTHELPFILEPATH_Reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 669 | check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, PROJECTHELPFILEPATH_Reserved) | 684 | check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, PROJECTHELPFILEPATH_Reserved) |
| @@ -702,7 +717,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | @@ -702,7 +717,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | ||
| 702 | check_value('PROJECTCONSTANTS_Id', 0x000C, PROJECTCONSTANTS_Id) | 717 | check_value('PROJECTCONSTANTS_Id', 0x000C, PROJECTCONSTANTS_Id) |
| 703 | PROJECTCONSTANTS_SizeOfConstants = struct.unpack("<L", dir_stream.read(4))[0] | 718 | PROJECTCONSTANTS_SizeOfConstants = struct.unpack("<L", dir_stream.read(4))[0] |
| 704 | if PROJECTCONSTANTS_SizeOfConstants > 1015: | 719 | if PROJECTCONSTANTS_SizeOfConstants > 1015: |
| 705 | - logging.error("PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants)) | 720 | + logging.error( |
| 721 | + "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants)) | ||
| 706 | PROJECTCONSTANTS_Constants = dir_stream.read(PROJECTCONSTANTS_SizeOfConstants) | 722 | PROJECTCONSTANTS_Constants = dir_stream.read(PROJECTCONSTANTS_SizeOfConstants) |
| 707 | PROJECTCONSTANTS_Reserved = struct.unpack("<H", dir_stream.read(2))[0] | 723 | PROJECTCONSTANTS_Reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 708 | check_value('PROJECTCONSTANTS_Reserved', 0x003C, PROJECTCONSTANTS_Reserved) | 724 | check_value('PROJECTCONSTANTS_Reserved', 0x003C, PROJECTCONSTANTS_Reserved) |
| @@ -740,23 +756,26 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | @@ -740,23 +756,26 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | ||
| 740 | if check == 0x002F: | 756 | if check == 0x002F: |
| 741 | # REFERENCECONTROL | 757 | # REFERENCECONTROL |
| 742 | REFERENCECONTROL_Id = check | 758 | REFERENCECONTROL_Id = check |
| 743 | - REFERENCECONTROL_SizeTwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore | 759 | + REFERENCECONTROL_SizeTwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore |
| 744 | REFERENCECONTROL_SizeOfLibidTwiddled = struct.unpack("<L", dir_stream.read(4))[0] | 760 | REFERENCECONTROL_SizeOfLibidTwiddled = struct.unpack("<L", dir_stream.read(4))[0] |
| 745 | REFERENCECONTROL_LibidTwiddled = dir_stream.read(REFERENCECONTROL_SizeOfLibidTwiddled) | 761 | REFERENCECONTROL_LibidTwiddled = dir_stream.read(REFERENCECONTROL_SizeOfLibidTwiddled) |
| 746 | - REFERENCECONTROL_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore | 762 | + REFERENCECONTROL_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore |
| 747 | check_value('REFERENCECONTROL_Reserved1', 0x0000, REFERENCECONTROL_Reserved1) | 763 | check_value('REFERENCECONTROL_Reserved1', 0x0000, REFERENCECONTROL_Reserved1) |
| 748 | - REFERENCECONTROL_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore | 764 | + REFERENCECONTROL_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore |
| 749 | check_value('REFERENCECONTROL_Reserved2', 0x0000, REFERENCECONTROL_Reserved2) | 765 | check_value('REFERENCECONTROL_Reserved2', 0x0000, REFERENCECONTROL_Reserved2) |
| 750 | # optional field | 766 | # optional field |
| 751 | check2 = struct.unpack("<H", dir_stream.read(2))[0] | 767 | check2 = struct.unpack("<H", dir_stream.read(2))[0] |
| 752 | if check2 == 0x0016: | 768 | if check2 == 0x0016: |
| 753 | REFERENCECONTROL_NameRecordExtended_Id = check | 769 | REFERENCECONTROL_NameRecordExtended_Id = check |
| 754 | REFERENCECONTROL_NameRecordExtended_SizeofName = struct.unpack("<L", dir_stream.read(4))[0] | 770 | REFERENCECONTROL_NameRecordExtended_SizeofName = struct.unpack("<L", dir_stream.read(4))[0] |
| 755 | - REFERENCECONTROL_NameRecordExtended_Name = dir_stream.read(REFERENCECONTROL_NameRecordExtended_SizeofName) | 771 | + REFERENCECONTROL_NameRecordExtended_Name = dir_stream.read( |
| 772 | + REFERENCECONTROL_NameRecordExtended_SizeofName) | ||
| 756 | REFERENCECONTROL_NameRecordExtended_Reserved = struct.unpack("<H", dir_stream.read(2))[0] | 773 | REFERENCECONTROL_NameRecordExtended_Reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 757 | - check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, REFERENCECONTROL_NameRecordExtended_Reserved) | 774 | + check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, |
| 775 | + REFERENCECONTROL_NameRecordExtended_Reserved) | ||
| 758 | REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode = struct.unpack("<L", dir_stream.read(4))[0] | 776 | REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode = struct.unpack("<L", dir_stream.read(4))[0] |
| 759 | - REFERENCECONTROL_NameRecordExtended_NameUnicode = dir_stream.read(REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode) | 777 | + REFERENCECONTROL_NameRecordExtended_NameUnicode = dir_stream.read( |
| 778 | + REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode) | ||
| 760 | REFERENCECONTROL_Reserved3 = struct.unpack("<H", dir_stream.read(2))[0] | 779 | REFERENCECONTROL_Reserved3 = struct.unpack("<H", dir_stream.read(2))[0] |
| 761 | else: | 780 | else: |
| 762 | REFERENCECONTROL_Reserved3 = check2 | 781 | REFERENCECONTROL_Reserved3 = check2 |
| @@ -798,7 +817,7 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | @@ -798,7 +817,7 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | ||
| 798 | logging.error('invalid or unknown check Id {0:04X}'.format(check)) | 817 | logging.error('invalid or unknown check Id {0:04X}'.format(check)) |
| 799 | sys.exit(0) | 818 | sys.exit(0) |
| 800 | 819 | ||
| 801 | - PROJECTMODULES_Id = check #struct.unpack("<H", dir_stream.read(2))[0] | 820 | + PROJECTMODULES_Id = check #struct.unpack("<H", dir_stream.read(2))[0] |
| 802 | check_value('PROJECTMODULES_Id', 0x000F, PROJECTMODULES_Id) | 821 | check_value('PROJECTMODULES_Id', 0x000F, PROJECTMODULES_Id) |
| 803 | PROJECTMODULES_Size = struct.unpack("<L", dir_stream.read(4))[0] | 822 | PROJECTMODULES_Size = struct.unpack("<L", dir_stream.read(4))[0] |
| 804 | check_value('PROJECTMODULES_Size', 0x0002, PROJECTMODULES_Size) | 823 | check_value('PROJECTMODULES_Size', 0x0002, PROJECTMODULES_Size) |
| @@ -878,7 +897,7 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | @@ -878,7 +897,7 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | ||
| 878 | MODULEPRIVATE_Reserved = struct.unpack("<L", dir_stream.read(4))[0] | 897 | MODULEPRIVATE_Reserved = struct.unpack("<L", dir_stream.read(4))[0] |
| 879 | check_value('MODULEPRIVATE_Reserved', 0x0000, MODULEPRIVATE_Reserved) | 898 | check_value('MODULEPRIVATE_Reserved', 0x0000, MODULEPRIVATE_Reserved) |
| 880 | section_id = struct.unpack("<H", dir_stream.read(2))[0] | 899 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 881 | - if section_id == 0x002B: # TERMINATOR | 900 | + if section_id == 0x002B: # TERMINATOR |
| 882 | MODULE_Reserved = struct.unpack("<L", dir_stream.read(4))[0] | 901 | MODULE_Reserved = struct.unpack("<L", dir_stream.read(4))[0] |
| 883 | check_value('MODULE_Reserved', 0x0000, MODULE_Reserved) | 902 | check_value('MODULE_Reserved', 0x0000, MODULE_Reserved) |
| 884 | section_id = None | 903 | section_id = None |
| @@ -964,9 +983,9 @@ def detect_autoexec(vba_code, obfuscation=None): | @@ -964,9 +983,9 @@ def detect_autoexec(vba_code, obfuscation=None): | ||
| 964 | for keyword in keywords: | 983 | for keyword in keywords: |
| 965 | #TODO: if keyword is already a compiled regex, use it as-is | 984 | #TODO: if keyword is already a compiled regex, use it as-is |
| 966 | # search using regex to detect word boundaries: | 985 | # search using regex to detect word boundaries: |
| 967 | - if re.search(r'(?i)\b'+keyword+r'\b', vba_code): | ||
| 968 | - #if keyword.lower() in vba_code: | ||
| 969 | - results.append((keyword, description+obf_text)) | 986 | + if re.search(r'(?i)\b' + keyword + r'\b', vba_code): |
| 987 | + #if keyword.lower() in vba_code: | ||
| 988 | + results.append((keyword, description + obf_text)) | ||
| 970 | return results | 989 | return results |
| 971 | 990 | ||
| 972 | 991 | ||
| @@ -988,9 +1007,9 @@ def detect_suspicious(vba_code, obfuscation=None): | @@ -988,9 +1007,9 @@ def detect_suspicious(vba_code, obfuscation=None): | ||
| 988 | for description, keywords in SUSPICIOUS_KEYWORDS.items(): | 1007 | for description, keywords in SUSPICIOUS_KEYWORDS.items(): |
| 989 | for keyword in keywords: | 1008 | for keyword in keywords: |
| 990 | # search using regex to detect word boundaries: | 1009 | # search using regex to detect word boundaries: |
| 991 | - if re.search(r'(?i)\b'+keyword+r'\b', vba_code): | ||
| 992 | - #if keyword.lower() in vba_code: | ||
| 993 | - results.append((keyword, description+obf_text)) | 1010 | + if re.search(r'(?i)\b' + keyword + r'\b', vba_code): |
| 1011 | + #if keyword.lower() in vba_code: | ||
| 1012 | + results.append((keyword, description + obf_text)) | ||
| 994 | return results | 1013 | return results |
| 995 | 1014 | ||
| 996 | 1015 | ||
| @@ -1011,7 +1030,7 @@ def detect_patterns(vba_code, obfuscation=None): | @@ -1011,7 +1030,7 @@ def detect_patterns(vba_code, obfuscation=None): | ||
| 1011 | for match in pattern_re.finditer(vba_code): | 1030 | for match in pattern_re.finditer(vba_code): |
| 1012 | value = match.group() | 1031 | value = match.group() |
| 1013 | if value not in found: | 1032 | if value not in found: |
| 1014 | - results.append((pattern_type+obf_text, value)) | 1033 | + results.append((pattern_type + obf_text, value)) |
| 1015 | found.add(value) | 1034 | found.add(value) |
| 1016 | return results | 1035 | return results |
| 1017 | 1036 | ||
| @@ -1070,6 +1089,7 @@ def detect_dridex_strings(vba_code): | @@ -1070,6 +1089,7 @@ def detect_dridex_strings(vba_code): | ||
| 1070 | :return: list of str tuples (encoded string, decoded string) | 1089 | :return: list of str tuples (encoded string, decoded string) |
| 1071 | """ | 1090 | """ |
| 1072 | from thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode | 1091 | from thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode |
| 1092 | + | ||
| 1073 | results = [] | 1093 | results = [] |
| 1074 | found = set() | 1094 | found = set() |
| 1075 | for match in re_dridex_string.finditer(vba_code): | 1095 | for match in re_dridex_string.finditer(vba_code): |
| @@ -1088,7 +1108,7 @@ def detect_dridex_strings(vba_code): | @@ -1088,7 +1108,7 @@ def detect_dridex_strings(vba_code): | ||
| 1088 | return results | 1108 | return results |
| 1089 | 1109 | ||
| 1090 | 1110 | ||
| 1091 | -class VBA_Scanner (object): | 1111 | +class VBA_Scanner(object): |
| 1092 | """ | 1112 | """ |
| 1093 | Class to scan the source code of a VBA module to find obfuscated strings, | 1113 | Class to scan the source code of a VBA module to find obfuscated strings, |
| 1094 | suspicious keywords, IOCs, auto-executable macros, etc. | 1114 | suspicious keywords, IOCs, auto-executable macros, etc. |
| @@ -1125,35 +1145,35 @@ class VBA_Scanner (object): | @@ -1125,35 +1145,35 @@ class VBA_Scanner (object): | ||
| 1125 | if 'strreverse' in self.code.lower(): self.strReverse = True | 1145 | if 'strreverse' in self.code.lower(): self.strReverse = True |
| 1126 | # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: | 1146 | # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: |
| 1127 | for encoded, decoded in self.hex_strings: | 1147 | for encoded, decoded in self.hex_strings: |
| 1128 | - self.code_hex += '\n'+decoded | 1148 | + self.code_hex += '\n' + decoded |
| 1129 | # if the code contains "StrReverse", also append the hex strings in reverse order: | 1149 | # if the code contains "StrReverse", also append the hex strings in reverse order: |
| 1130 | if self.strReverse: | 1150 | if self.strReverse: |
| 1131 | # StrReverse after hex decoding: | 1151 | # StrReverse after hex decoding: |
| 1132 | - self.code_hex_rev += '\n'+decoded[::-1] | 1152 | + self.code_hex_rev += '\n' + decoded[::-1] |
| 1133 | # StrReverse before hex decoding: | 1153 | # StrReverse before hex decoding: |
| 1134 | - self.code_rev_hex += '\n'+binascii.unhexlify(encoded[::-1]) | 1154 | + self.code_rev_hex += '\n' + binascii.unhexlify(encoded[::-1]) |
| 1135 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ | 1155 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ |
| 1136 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) | 1156 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) |
| 1137 | # Detect Base64-encoded strings | 1157 | # Detect Base64-encoded strings |
| 1138 | self.base64_strings = detect_base64_strings(self.code) | 1158 | self.base64_strings = detect_base64_strings(self.code) |
| 1139 | for encoded, decoded in self.base64_strings: | 1159 | for encoded, decoded in self.base64_strings: |
| 1140 | - self.code_base64 += '\n'+decoded | 1160 | + self.code_base64 += '\n' + decoded |
| 1141 | # Detect Dridex-encoded strings | 1161 | # Detect Dridex-encoded strings |
| 1142 | self.dridex_strings = detect_dridex_strings(self.code) | 1162 | self.dridex_strings = detect_dridex_strings(self.code) |
| 1143 | for encoded, decoded in self.dridex_strings: | 1163 | for encoded, decoded in self.dridex_strings: |
| 1144 | - self.code_dridex += '\n'+decoded | 1164 | + self.code_dridex += '\n' + decoded |
| 1145 | results = [] | 1165 | results = [] |
| 1146 | self.autoexec_keywords = [] | 1166 | self.autoexec_keywords = [] |
| 1147 | self.suspicious_keywords = [] | 1167 | self.suspicious_keywords = [] |
| 1148 | self.iocs = [] | 1168 | self.iocs = [] |
| 1149 | 1169 | ||
| 1150 | for code, obfuscation in ( | 1170 | for code, obfuscation in ( |
| 1151 | - (self.code, None), | ||
| 1152 | - (self.code_hex, 'Hex'), | ||
| 1153 | - (self.code_hex_rev, 'Hex+StrReverse'), | ||
| 1154 | - (self.code_rev_hex, 'StrReverse+Hex'), | ||
| 1155 | - (self.code_base64, 'Base64'), | ||
| 1156 | - (self.code_dridex, 'Dridex'), | 1171 | + (self.code, None), |
| 1172 | + (self.code_hex, 'Hex'), | ||
| 1173 | + (self.code_hex_rev, 'Hex+StrReverse'), | ||
| 1174 | + (self.code_rev_hex, 'StrReverse+Hex'), | ||
| 1175 | + (self.code_base64, 'Base64'), | ||
| 1176 | + (self.code_dridex, 'Dridex'), | ||
| 1157 | ): | 1177 | ): |
| 1158 | self.autoexec_keywords += detect_autoexec(code, obfuscation) | 1178 | self.autoexec_keywords += detect_autoexec(code, obfuscation) |
| 1159 | self.suspicious_keywords += detect_suspicious(code, obfuscation) | 1179 | self.suspicious_keywords += detect_suspicious(code, obfuscation) |
| @@ -1162,13 +1182,13 @@ class VBA_Scanner (object): | @@ -1162,13 +1182,13 @@ class VBA_Scanner (object): | ||
| 1162 | # If hex-encoded strings were discovered, add an item to suspicious keywords: | 1182 | # If hex-encoded strings were discovered, add an item to suspicious keywords: |
| 1163 | if self.hex_strings: | 1183 | if self.hex_strings: |
| 1164 | self.suspicious_keywords.append(('Hex Strings', | 1184 | self.suspicious_keywords.append(('Hex Strings', |
| 1165 | - 'Hex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) | 1185 | + 'Hex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) |
| 1166 | if self.base64_strings: | 1186 | if self.base64_strings: |
| 1167 | self.suspicious_keywords.append(('Base64 Strings', | 1187 | self.suspicious_keywords.append(('Base64 Strings', |
| 1168 | - 'Base64-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) | 1188 | + 'Base64-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) |
| 1169 | if self.dridex_strings: | 1189 | if self.dridex_strings: |
| 1170 | self.suspicious_keywords.append(('Dridex Strings', | 1190 | self.suspicious_keywords.append(('Dridex Strings', |
| 1171 | - 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) | 1191 | + 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) |
| 1172 | for keyword, description in self.autoexec_keywords: | 1192 | for keyword, description in self.autoexec_keywords: |
| 1173 | results.append(('AutoExec', keyword, description)) | 1193 | results.append(('AutoExec', keyword, description)) |
| 1174 | for keyword, description in self.suspicious_keywords: | 1194 | for keyword, description in self.suspicious_keywords: |
| @@ -1195,9 +1215,8 @@ class VBA_Scanner (object): | @@ -1195,9 +1215,8 @@ class VBA_Scanner (object): | ||
| 1195 | """ | 1215 | """ |
| 1196 | self.scan() | 1216 | self.scan() |
| 1197 | return (len(self.autoexec_keywords), len(self.suspicious_keywords), | 1217 | return (len(self.autoexec_keywords), len(self.suspicious_keywords), |
| 1198 | - len(self.iocs), len(self.hex_strings), len(self.base64_strings), | ||
| 1199 | - len(self.dridex_strings)) | ||
| 1200 | - | 1218 | + len(self.iocs), len(self.hex_strings), len(self.base64_strings), |
| 1219 | + len(self.dridex_strings)) | ||
| 1201 | 1220 | ||
| 1202 | 1221 | ||
| 1203 | def scan_vba(vba_code, include_decoded_strings): | 1222 | def scan_vba(vba_code, include_decoded_strings): |
| @@ -1265,11 +1284,12 @@ class VBA_Parser(object): | @@ -1265,11 +1284,12 @@ class VBA_Parser(object): | ||
| 1265 | # self.filename = '<file-like object>' | 1284 | # self.filename = '<file-like object>' |
| 1266 | if olefile.isOleFile(_file): | 1285 | if olefile.isOleFile(_file): |
| 1267 | # This looks like an OLE file | 1286 | # This looks like an OLE file |
| 1268 | - logging.info('Parsing OLE file %s' % self.filename) | 1287 | + logging.info('Opening OLE file %s' % self.filename) |
| 1269 | # Open and parse the OLE file, using unicode for path names: | 1288 | # Open and parse the OLE file, using unicode for path names: |
| 1270 | - self.ole_file = olefile.OleFileIO(_file, path_encoding=None) | ||
| 1271 | self.type = TYPE_OLE | 1289 | self.type = TYPE_OLE |
| 1272 | - #TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet | 1290 | + # TODO: handle OLE parsing exceptions |
| 1291 | + self.ole_file = olefile.OleFileIO(_file, path_encoding=None) | ||
| 1292 | + # TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet | ||
| 1273 | elif zipfile.is_zipfile(_file): | 1293 | elif zipfile.is_zipfile(_file): |
| 1274 | # This looks like a zip file, need to look for vbaProject.bin inside | 1294 | # This looks like a zip file, need to look for vbaProject.bin inside |
| 1275 | # It can be any OLE file inside the archive | 1295 | # It can be any OLE file inside the archive |
| @@ -1279,7 +1299,7 @@ class VBA_Parser(object): | @@ -1279,7 +1299,7 @@ class VBA_Parser(object): | ||
| 1279 | self.type = TYPE_OpenXML | 1299 | self.type = TYPE_OpenXML |
| 1280 | z = zipfile.ZipFile(_file) | 1300 | z = zipfile.ZipFile(_file) |
| 1281 | #TODO: check if this is actually an OpenXML file | 1301 | #TODO: check if this is actually an OpenXML file |
| 1282 | - #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically? | 1302 | + #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically |
| 1283 | # check each file within the zip if it is an OLE file, by reading its magic: | 1303 | # check each file within the zip if it is an OLE file, by reading its magic: |
| 1284 | for subfile in z.namelist(): | 1304 | for subfile in z.namelist(): |
| 1285 | magic = z.open(subfile).read(len(olefile.MAGIC)) | 1305 | magic = z.open(subfile).read(len(olefile.MAGIC)) |
| @@ -1297,64 +1317,90 @@ class VBA_Parser(object): | @@ -1297,64 +1317,90 @@ class VBA_Parser(object): | ||
| 1297 | # or a plain text file containing VBA code | 1317 | # or a plain text file containing VBA code |
| 1298 | if data is None: | 1318 | if data is None: |
| 1299 | data = open(filename, 'rb').read() | 1319 | data = open(filename, 'rb').read() |
| 1320 | + # store a lowercase version for some tests: | ||
| 1321 | + data_lowercase = data.lower() | ||
| 1300 | # TODO: move each format parser to a separate method | 1322 | # TODO: move each format parser to a separate method |
| 1301 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace | 1323 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 1302 | if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: | 1324 | if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: |
| 1303 | logging.info('Opening Word 2003 XML file %s' % self.filename) | 1325 | logging.info('Opening Word 2003 XML file %s' % self.filename) |
| 1304 | - self.type = TYPE_Word2003_XML | ||
| 1305 | - # parse the XML content | ||
| 1306 | - et = ET.fromstring(data) | ||
| 1307 | - # find all the binData elements: | ||
| 1308 | - for bindata in et.getiterator(TAG_BINDATA): | ||
| 1309 | - # the binData content is an OLE container for the VBA project, compressed | ||
| 1310 | - # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | ||
| 1311 | - # get the filename: | ||
| 1312 | - fname = bindata.get(ATTR_NAME, 'noname.mso') | ||
| 1313 | - # decode the base64 activemime | ||
| 1314 | - activemime = binascii.a2b_base64(bindata.text) | ||
| 1315 | - # decompress the zlib data starting at offset 0x32, which is the OLE container: | ||
| 1316 | - ole_data = zlib.decompress(activemime[0x32:]) | ||
| 1317 | - try: | ||
| 1318 | - self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | ||
| 1319 | - except: | ||
| 1320 | - logging.debug('%s is not a valid OLE file' % fname) | ||
| 1321 | - continue | ||
| 1322 | - # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace | ||
| 1323 | - # TODO: check if Word accepts data before the MIME header, if is case-sensitive, etc. | ||
| 1324 | - elif data.lower().startswith('mime-version:'): | ||
| 1325 | - logging.info('Opening Word MHTML file %s' % self.filename) | ||
| 1326 | - self.type = TYPE_MHTML | ||
| 1327 | - # parse the MIME content | ||
| 1328 | - mhtml = email.message_from_string(data) | ||
| 1329 | - # find all the attached files: | ||
| 1330 | - for part in mhtml.walk(): | ||
| 1331 | - content_type = part.get_content_type() # always returns a value | ||
| 1332 | - fname = part.get_filename(None) # returns None if it fails | ||
| 1333 | - logging.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type)) | ||
| 1334 | - part_data = part.get_payload(decode=True) | ||
| 1335 | - # VBA macros are stored in a binary file named "editdata.mso". | ||
| 1336 | - # the data content is an OLE container for the VBA project, compressed | ||
| 1337 | - # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | ||
| 1338 | - # decompress the zlib data starting at offset 0x32, which is the OLE container: | ||
| 1339 | - try: | ||
| 1340 | - ole_data = zlib.decompress(part_data[0x32:]) | ||
| 1341 | - except: | ||
| 1342 | - logging.debug('%s is not an ActiveMime container' % fname) | ||
| 1343 | - continue | ||
| 1344 | - try: | ||
| 1345 | - self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | ||
| 1346 | - except: | ||
| 1347 | - logging.debug('%s is not a valid OLE file' % fname) | ||
| 1348 | - continue | ||
| 1349 | - #TODO: handle exceptions | ||
| 1350 | - #TODO: Excel 2003 XML | ||
| 1351 | - #TODO: plain text VBA file | ||
| 1352 | - else: | ||
| 1353 | - msg = '%s is not an OLE nor an OpenXML file, cannot extract VBA Macros.' % self.filename | ||
| 1354 | - logging.error(msg) | ||
| 1355 | - raise TypeError(msg) | ||
| 1356 | - | ||
| 1357 | - def find_vba_projects (self): | 1326 | + try: |
| 1327 | + # parse the XML content | ||
| 1328 | + # TODO: handle XML parsing exceptions | ||
| 1329 | + et = ET.fromstring(data) | ||
| 1330 | + # set type only if parsing succeeds | ||
| 1331 | + self.type = TYPE_Word2003_XML | ||
| 1332 | + # find all the binData elements: | ||
| 1333 | + for bindata in et.getiterator(TAG_BINDATA): | ||
| 1334 | + # the binData content is an OLE container for the VBA project, compressed | ||
| 1335 | + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | ||
| 1336 | + # get the filename: | ||
| 1337 | + fname = bindata.get(ATTR_NAME, 'noname.mso') | ||
| 1338 | + # decode the base64 activemime | ||
| 1339 | + activemime = binascii.a2b_base64(bindata.text) | ||
| 1340 | + # decompress the zlib data starting at offset 0x32, which is the OLE container: | ||
| 1341 | + # TODO: handle different offsets => separate function | ||
| 1342 | + ole_data = zlib.decompress(activemime[0x32:]) | ||
| 1343 | + try: | ||
| 1344 | + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | ||
| 1345 | + except: | ||
| 1346 | + logging.debug('%s is not a valid OLE file' % fname) | ||
| 1347 | + continue | ||
| 1348 | + except: | ||
| 1349 | + logging.exception('Failed XML parsing for file %r' % self.filename) | ||
| 1350 | + pass | ||
| 1351 | + # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"): | ||
| 1352 | + # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line | ||
| 1353 | + # BUT Word accepts a blank line or other MIME headers inserted before, | ||
| 1354 | + # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. | ||
| 1355 | + # And the line is case insensitive. | ||
| 1356 | + # so we'll just check the presence of mime, version and multipart anywhere: | ||
| 1357 | + if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase and 'multipart' in data_lowercase: | ||
| 1358 | + logging.info('Opening MHTML file %s' % self.filename) | ||
| 1359 | + try: | ||
| 1360 | + # parse the MIME content | ||
| 1361 | + # remove any leading whitespace or newline (workaround for issue in email package) | ||
| 1362 | + stripped_data = data.lstrip('\r\n\t ') | ||
| 1363 | + mhtml = email.message_from_string(stripped_data) | ||
| 1364 | + self.type = TYPE_MHTML | ||
| 1365 | + # find all the attached files: | ||
| 1366 | + for part in mhtml.walk(): | ||
| 1367 | + content_type = part.get_content_type() # always returns a value | ||
| 1368 | + fname = part.get_filename(None) # returns None if it fails | ||
| 1369 | + logging.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type)) | ||
| 1370 | + part_data = part.get_payload(decode=True) | ||
| 1371 | + # VBA macros are stored in a binary file named "editdata.mso". | ||
| 1372 | + # the data content is an OLE container for the VBA project, compressed | ||
| 1373 | + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | ||
| 1374 | + # decompress the zlib data starting at offset 0x32, which is the OLE container: | ||
| 1375 | + # check ActiveMime header: | ||
| 1376 | + if isinstance(part_data, str) and part_data.startswith(MSO_ACTIVEMIME_HEADER): | ||
| 1377 | + logging.debug('Found ActiveMime header, decompressing MSO container') | ||
| 1378 | + try: | ||
| 1379 | + ole_data = zlib.decompress(part_data[0x32:]) | ||
| 1380 | + try: | ||
| 1381 | + # TODO: check if it is actually an OLE file | ||
| 1382 | + # TODO: get the MSO filename from content_location? | ||
| 1383 | + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | ||
| 1384 | + except: | ||
| 1385 | + logging.debug('%s is not a valid OLE file' % fname) | ||
| 1386 | + except: | ||
| 1387 | + logging.error('Failed decompressing an MSO container in %r - %s' | ||
| 1388 | + % (fname, MSG_OLEVBA_ISSUES)) | ||
| 1389 | + # TODO: bug here - need to split in smaller functions/classes? | ||
| 1390 | + except: | ||
| 1391 | + logging.exception('Failed MIME parsing for file %r - %s' | ||
| 1392 | + % (self.filename, MSG_OLEVBA_ISSUES)) | ||
| 1393 | + pass | ||
| 1394 | + | ||
| 1395 | + #TODO: handle exceptions | ||
| 1396 | + #TODO: Excel 2003 XML | ||
| 1397 | + #TODO: plain text VBA file | ||
| 1398 | + if self.type is None: | ||
| 1399 | + msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename | ||
| 1400 | + logging.error(msg) | ||
| 1401 | + raise TypeError(msg) | ||
| 1402 | + | ||
| 1403 | + def find_vba_projects(self): | ||
| 1358 | """ | 1404 | """ |
| 1359 | Finds all the VBA projects stored in an OLE file. | 1405 | Finds all the VBA projects stored in an OLE file. |
| 1360 | 1406 | ||
| @@ -1465,7 +1511,7 @@ class VBA_Parser(object): | @@ -1465,7 +1511,7 @@ class VBA_Parser(object): | ||
| 1465 | return True | 1511 | return True |
| 1466 | 1512 | ||
| 1467 | 1513 | ||
| 1468 | - def extract_macros (self): | 1514 | + def extract_macros(self): |
| 1469 | """ | 1515 | """ |
| 1470 | Extract and decompress source code for each VBA macro found in the file | 1516 | Extract and decompress source code for each VBA macro found in the file |
| 1471 | 1517 | ||
| @@ -1482,7 +1528,8 @@ class VBA_Parser(object): | @@ -1482,7 +1528,8 @@ class VBA_Parser(object): | ||
| 1482 | self.find_vba_projects() | 1528 | self.find_vba_projects() |
| 1483 | for vba_root, project_path, dir_path in self.vba_projects: | 1529 | for vba_root, project_path, dir_path in self.vba_projects: |
| 1484 | # extract all VBA macros from that VBA root storage: | 1530 | # extract all VBA macros from that VBA root storage: |
| 1485 | - for stream_path, vba_filename, vba_code in _extract_vba(self.ole_file, vba_root, project_path, dir_path): | 1531 | + for stream_path, vba_filename, vba_code in _extract_vba(self.ole_file, vba_root, project_path, |
| 1532 | + dir_path): | ||
| 1486 | yield (self.filename, stream_path, vba_filename, vba_code) | 1533 | yield (self.filename, stream_path, vba_filename, vba_code) |
| 1487 | 1534 | ||
| 1488 | 1535 | ||
| @@ -1520,8 +1567,7 @@ def print_analysis(vba_code, show_decoded_strings=False): | @@ -1520,8 +1567,7 @@ def print_analysis(vba_code, show_decoded_strings=False): | ||
| 1520 | print 'No suspicious keyword or IOC found.' | 1567 | print 'No suspicious keyword or IOC found.' |
| 1521 | 1568 | ||
| 1522 | 1569 | ||
| 1523 | - | ||
| 1524 | -def process_file (container, filename, data, show_decoded_strings=False): | 1570 | +def process_file(container, filename, data, show_decoded_strings=False): |
| 1525 | """ | 1571 | """ |
| 1526 | Process a single file | 1572 | Process a single file |
| 1527 | 1573 | ||
| @@ -1536,7 +1582,7 @@ def process_file (container, filename, data, show_decoded_strings=False): | @@ -1536,7 +1582,7 @@ def process_file (container, filename, data, show_decoded_strings=False): | ||
| 1536 | display_filename = '%s in %s' % (filename, container) | 1582 | display_filename = '%s in %s' % (filename, container) |
| 1537 | else: | 1583 | else: |
| 1538 | display_filename = filename | 1584 | display_filename = filename |
| 1539 | - print '='*79 | 1585 | + print '=' * 79 |
| 1540 | print 'FILE:', display_filename | 1586 | print 'FILE:', display_filename |
| 1541 | try: | 1587 | try: |
| 1542 | #TODO: handle olefile errors, when an OLE file is malformed | 1588 | #TODO: handle olefile errors, when an OLE file is malformed |
| @@ -1548,22 +1594,22 @@ def process_file (container, filename, data, show_decoded_strings=False): | @@ -1548,22 +1594,22 @@ def process_file (container, filename, data, show_decoded_strings=False): | ||
| 1548 | # hide attribute lines: | 1594 | # hide attribute lines: |
| 1549 | #TODO: option to disable attribute filtering | 1595 | #TODO: option to disable attribute filtering |
| 1550 | vba_code_filtered = filter_vba(vba_code) | 1596 | vba_code_filtered = filter_vba(vba_code) |
| 1551 | - print '-'*79 | 1597 | + print '-' * 79 |
| 1552 | print 'VBA MACRO %s ' % vba_filename | 1598 | print 'VBA MACRO %s ' % vba_filename |
| 1553 | print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) | 1599 | print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) |
| 1554 | - print '- '*39 | 1600 | + print '- ' * 39 |
| 1555 | # detect empty macros: | 1601 | # detect empty macros: |
| 1556 | if vba_code_filtered.strip() == '': | 1602 | if vba_code_filtered.strip() == '': |
| 1557 | print '(empty macro)' | 1603 | print '(empty macro)' |
| 1558 | else: | 1604 | else: |
| 1559 | print vba_code_filtered | 1605 | print vba_code_filtered |
| 1560 | - print '- '*39 | 1606 | + print '- ' * 39 |
| 1561 | print 'ANALYSIS:' | 1607 | print 'ANALYSIS:' |
| 1562 | # analyse the whole code, filtered to avoid false positives: | 1608 | # analyse the whole code, filtered to avoid false positives: |
| 1563 | print_analysis(vba_code_filtered, show_decoded_strings) | 1609 | print_analysis(vba_code_filtered, show_decoded_strings) |
| 1564 | else: | 1610 | else: |
| 1565 | print 'No VBA macros found.' | 1611 | print 'No VBA macros found.' |
| 1566 | - except: #TypeError: | 1612 | + except: #TypeError: |
| 1567 | #raise | 1613 | #raise |
| 1568 | #TODO: print more info if debug mode | 1614 | #TODO: print more info if debug mode |
| 1569 | #print sys.exc_value | 1615 | #print sys.exc_value |
| @@ -1572,7 +1618,7 @@ def process_file (container, filename, data, show_decoded_strings=False): | @@ -1572,7 +1618,7 @@ def process_file (container, filename, data, show_decoded_strings=False): | ||
| 1572 | print '' | 1618 | print '' |
| 1573 | 1619 | ||
| 1574 | 1620 | ||
| 1575 | -def process_file_triage (container, filename, data): | 1621 | +def process_file_triage(container, filename, data): |
| 1576 | """ | 1622 | """ |
| 1577 | Process a single file | 1623 | Process a single file |
| 1578 | 1624 | ||
| @@ -1624,7 +1670,7 @@ def process_file_triage (container, filename, data): | @@ -1624,7 +1670,7 @@ def process_file_triage (container, filename, data): | ||
| 1624 | if nb_base64strings: base64obf = 'B' | 1670 | if nb_base64strings: base64obf = 'B' |
| 1625 | if nb_dridexstrings: dridex = 'D' | 1671 | if nb_dridexstrings: dridex = 'D' |
| 1626 | flags += '%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, | 1672 | flags += '%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, |
| 1627 | - base64obf, dridex) | 1673 | + base64obf, dridex) |
| 1628 | 1674 | ||
| 1629 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' | 1675 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' |
| 1630 | # if nb_macros: macros = 'YES:%d' % nb_macros | 1676 | # if nb_macros: macros = 'YES:%d' % nb_macros |
| @@ -1663,9 +1709,11 @@ def process_file_triage (container, filename, data): | @@ -1663,9 +1709,11 @@ def process_file_triage (container, filename, data): | ||
| 1663 | # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings)) | 1709 | # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings)) |
| 1664 | # print t | 1710 | # print t |
| 1665 | 1711 | ||
| 1712 | + | ||
| 1666 | def main_triage_quick(): | 1713 | def main_triage_quick(): |
| 1667 | pass | 1714 | pass |
| 1668 | 1715 | ||
| 1716 | + | ||
| 1669 | #=== MAIN ===================================================================== | 1717 | #=== MAIN ===================================================================== |
| 1670 | 1718 | ||
| 1671 | def main(): | 1719 | def main(): |
| @@ -1679,19 +1727,19 @@ def main(): | @@ -1679,19 +1727,19 @@ def main(): | ||
| 1679 | # parser.add_option('-c', '--csv', dest='csv', | 1727 | # parser.add_option('-c', '--csv', dest='csv', |
| 1680 | # help='export results to a CSV file') | 1728 | # help='export results to a CSV file') |
| 1681 | parser.add_option("-r", action="store_true", dest="recursive", | 1729 | parser.add_option("-r", action="store_true", dest="recursive", |
| 1682 | - help='find files recursively in subdirectories.') | 1730 | + help='find files recursively in subdirectories.') |
| 1683 | parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, | 1731 | parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, |
| 1684 | - help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') | 1732 | + help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') |
| 1685 | parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', | 1733 | parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', |
| 1686 | - help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') | 1734 | + help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') |
| 1687 | parser.add_option("-t", action="store_true", dest="triage_mode", | 1735 | parser.add_option("-t", action="store_true", dest="triage_mode", |
| 1688 | - help='triage mode, display results as a summary table (default for multiple files)') | 1736 | + help='triage mode, display results as a summary table (default for multiple files)') |
| 1689 | parser.add_option("-d", action="store_true", dest="detailed_mode", | 1737 | parser.add_option("-d", action="store_true", dest="detailed_mode", |
| 1690 | - help='detailed mode, display full results (default for single file)') | 1738 | + help='detailed mode, display full results (default for single file)') |
| 1691 | parser.add_option("-i", "--input", dest='input', type='str', default=None, | 1739 | parser.add_option("-i", "--input", dest='input', type='str', default=None, |
| 1692 | - help='input file containing VBA source code to be analyzed (no parsing)') | 1740 | + help='input file containing VBA source code to be analyzed (no parsing)') |
| 1693 | parser.add_option("--decode", action="store_true", dest="show_decoded_strings", | 1741 | parser.add_option("--decode", action="store_true", dest="show_decoded_strings", |
| 1694 | - help='display all the obfuscated strings with their decoded content (Hex, Base64, StrReverse, Dridex).') | 1742 | + help='display all the obfuscated strings with their decoded content (Hex, Base64, StrReverse, Dridex).') |
| 1695 | 1743 | ||
| 1696 | (options, args) = parser.parse_args() | 1744 | (options, args) = parser.parse_args() |
| 1697 | 1745 | ||
| @@ -1705,9 +1753,9 @@ def main(): | @@ -1705,9 +1753,9 @@ def main(): | ||
| 1705 | print 'olevba %s - http://decalage.info/python/oletools' % __version__ | 1753 | print 'olevba %s - http://decalage.info/python/oletools' % __version__ |
| 1706 | 1754 | ||
| 1707 | # TODO: option to set logging level, none by default | 1755 | # TODO: option to set logging level, none by default |
| 1708 | - logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) | 1756 | + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) #.WARNING) #INFO) |
| 1709 | # For now, all logging is disabled: | 1757 | # For now, all logging is disabled: |
| 1710 | - logging.disable(logging.CRITICAL) | 1758 | + #logging.disable(logging.CRITICAL) |
| 1711 | 1759 | ||
| 1712 | if options.input: | 1760 | if options.input: |
| 1713 | # input file provided with VBA source code to be analyzed directly: | 1761 | # input file provided with VBA source code to be analyzed directly: |
| @@ -1720,12 +1768,12 @@ def main(): | @@ -1720,12 +1768,12 @@ def main(): | ||
| 1720 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7) | 1768 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7) |
| 1721 | if not options.detailed_mode or options.triage_mode: | 1769 | if not options.detailed_mode or options.triage_mode: |
| 1722 | print '%-11s %-65s' % ('Flags', 'Filename') | 1770 | print '%-11s %-65s' % ('Flags', 'Filename') |
| 1723 | - print '%-11s %-65s' % ('-'*11, '-'*65) | 1771 | + print '%-11s %-65s' % ('-' * 11, '-' * 65) |
| 1724 | previous_container = None | 1772 | previous_container = None |
| 1725 | count = 0 | 1773 | count = 0 |
| 1726 | container = filename = data = None | 1774 | container = filename = data = None |
| 1727 | for container, filename, data in xglob.iter_files(args, recursive=options.recursive, | 1775 | for container, filename, data in xglob.iter_files(args, recursive=options.recursive, |
| 1728 | - zip_password=options.zip_password, zip_fname=options.zip_fname): | 1776 | + zip_password=options.zip_password, zip_fname=options.zip_fname): |
| 1729 | # ignore directory names stored in zip files: | 1777 | # ignore directory names stored in zip files: |
| 1730 | if container and filename.endswith('/'): | 1778 | if container and filename.endswith('/'): |
| 1731 | continue | 1779 | continue |
| @@ -1749,7 +1797,8 @@ def main(): | @@ -1749,7 +1797,8 @@ def main(): | ||
| 1749 | #TODO: avoid doing the analysis twice by storing results | 1797 | #TODO: avoid doing the analysis twice by storing results |
| 1750 | process_file(container, filename, data, show_decoded_strings=options.show_decoded_strings) | 1798 | process_file(container, filename, data, show_decoded_strings=options.show_decoded_strings) |
| 1751 | 1799 | ||
| 1800 | + | ||
| 1752 | if __name__ == '__main__': | 1801 | if __name__ == '__main__': |
| 1753 | main() | 1802 | main() |
| 1754 | 1803 | ||
| 1755 | -# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness | ||
| 1756 | \ No newline at end of file | 1804 | \ No newline at end of file |
| 1805 | + # This was coded while listening to "Dust" from I Love You But I've Chosen Darkness | ||
| 1757 | \ No newline at end of file | 1806 | \ No newline at end of file |