Commit bdad8c146df8e66815562272b6a00f980a359e0e

Authored by Philippe Lagadec
1 parent fad632c5

improved support for MHTML files with modified header: fixed issue #11

Showing 1 changed file with 217 additions and 168 deletions
oletools/olevba.py
... ... @@ -23,7 +23,7 @@ olevba is based on source code from officeparser by John William Davison
23 23 https://github.com/unixfreak0037/officeparser
24 24 """
25 25  
26   -#=== LICENSE ==================================================================
  26 +# === LICENSE ==================================================================
27 27  
28 28 # olevba is copyright (c) 2014-2015 Philippe Lagadec (http://www.decalage.info)
29 29 # All rights reserved.
... ... @@ -130,8 +130,10 @@ https://github.com/unixfreak0037/officeparser
130 130 # virtualisation detection
131 131 # 2015-05-06 v0.27 PL: - added support for MHTML files with VBA macros
132 132 # (issue #10 reported by Greg from SpamStopsHere)
  133 +# 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header
  134 +# (issue #11 reported by Thomas Chopitea)
133 135  
134   -__version__ = '0.27'
  136 +__version__ = '0.28'
135 137  
136 138 #------------------------------------------------------------------------------
137 139 # TODO:
... ... @@ -178,7 +180,7 @@ import binascii
178 180 import base64
179 181 import traceback
180 182 import zlib
181   -import email # for MHTML parsing
  183 +import email # for MHTML parsing
182 184  
183 185 # import lxml or ElementTree for XML parsing:
184 186 try:
... ... @@ -193,9 +195,9 @@ except ImportError:
193 195 # Python <2.5: standalone ElementTree install
194 196 import elementtree.cElementTree as ET
195 197 except ImportError:
196   - raise ImportError, "lxml or ElementTree are not installed, "\
197   - +"see http://codespeak.net/lxml "\
198   - +"or http://effbot.org/zone/element-index.htm"
  198 + raise ImportError, "lxml or ElementTree are not installed, " \
  199 + + "see http://codespeak.net/lxml " \
  200 + + "or http://effbot.org/zone/element-index.htm"
199 201  
200 202 import thirdparty.olefile as olefile
201 203 from thirdparty.prettytable import prettytable
... ... @@ -203,12 +205,19 @@ from thirdparty.xglob import xglob
203 205  
204 206 #--- CONSTANTS ----------------------------------------------------------------
205 207  
  208 +# URL and message to report issues:
  209 +URL_OLEVBA_ISSUES = 'https://bitbucket.org/decalage/oletools/issues'
  210 +MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES
  211 +
206 212 # Container types:
207   -TYPE_OLE = 'OLE'
  213 +TYPE_OLE = 'OLE'
208 214 TYPE_OpenXML = 'OpenXML'
209 215 TYPE_Word2003_XML = 'Word2003_XML'
210 216 TYPE_MHTML = 'MHTML'
211 217  
  218 +# MSO files ActiveMime header magic
  219 +MSO_ACTIVEMIME_HEADER = 'ActiveMime'
  220 +
212 221 MODULE_EXTENSION = "bas"
213 222 CLASS_EXTENSION = "cls"
214 223 FORM_EXTENSION = "frm"
... ... @@ -249,28 +258,28 @@ SUSPICIOUS_KEYWORDS = {
249 258 'May open a file':
250 259 ('Open',),
251 260 'May write to a file (if combined with Open)':
252   - #TODO: regex to find Open+Write on same line
  261 + #TODO: regex to find Open+Write on same line
253 262 ('Write', 'Put', 'Output', 'Print #'),
254 263 'May read or write a binary file (if combined with Open)':
255   - #TODO: regex to find Open+Binary on same line
  264 + #TODO: regex to find Open+Binary on same line
256 265 ('Binary',),
257 266 'May copy a file':
258 267 ('FileCopy', 'CopyFile'),
259   - #FileCopy: http://msdn.microsoft.com/en-us/library/office/gg264390%28v=office.15%29.aspx
260   - #CopyFile: http://msdn.microsoft.com/en-us/library/office/gg264089%28v=office.15%29.aspx
  268 + #FileCopy: http://msdn.microsoft.com/en-us/library/office/gg264390%28v=office.15%29.aspx
  269 + #CopyFile: http://msdn.microsoft.com/en-us/library/office/gg264089%28v=office.15%29.aspx
261 270 'May delete a file':
262 271 ('Kill',),
263 272 'May create a text file':
264   - ('CreateTextFile','ADODB.Stream', 'WriteText', 'SaveToFile'),
265   - #CreateTextFile: http://msdn.microsoft.com/en-us/library/office/gg264617%28v=office.15%29.aspx
266   - #ADODB.Stream sample: http://pastebin.com/Z4TMyuq6
  273 + ('CreateTextFile', 'ADODB.Stream', 'WriteText', 'SaveToFile'),
  274 + #CreateTextFile: http://msdn.microsoft.com/en-us/library/office/gg264617%28v=office.15%29.aspx
  275 + #ADODB.Stream sample: http://pastebin.com/Z4TMyuq6
267 276 'May run an executable file or a system command':
268 277 ('Shell', 'vbNormal', 'vbNormalFocus', 'vbHide', 'vbMinimizedFocus', 'vbMaximizedFocus', 'vbNormalNoFocus',
269 278 'vbMinimizedNoFocus', 'WScript.Shell', 'Run'),
270   - #Shell: http://msdn.microsoft.com/en-us/library/office/gg278437%28v=office.15%29.aspx
271   - #WScript.Shell+Run sample: http://pastebin.com/Z4TMyuq6
  279 + #Shell: http://msdn.microsoft.com/en-us/library/office/gg278437%28v=office.15%29.aspx
  280 + #WScript.Shell+Run sample: http://pastebin.com/Z4TMyuq6
272 281 'May run PowerShell commands':
273   - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
  282 + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
274 283 ('PowerShell', ),
275 284 'May hide the application':
276 285 ('Application.Visible', 'ShowWindow', 'SW_HIDE'),
... ... @@ -279,7 +288,7 @@ SUSPICIOUS_KEYWORDS = {
279 288 'May save the current workbook':
280 289 ('ActiveWorkbook.SaveAs',),
281 290 'May change which directory contains files to open at startup':
282   - #TODO: confirm the actual effect
  291 + #TODO: confirm the actual effect
283 292 ('Application.AltStartupPath',),
284 293 'May create an OLE object':
285 294 ('CreateObject',),
... ... @@ -288,58 +297,58 @@ SUSPICIOUS_KEYWORDS = {
288 297 'May enumerate application windows (if combined with Shell.Application object)':
289 298 ('Windows', 'FindWindow'),
290 299 'May run code from a DLL':
291   - #TODO: regex to find declare+lib on same line
  300 + #TODO: regex to find declare+lib on same line
292 301 ('Lib',),
293 302 'May download files from the Internet':
294   - #TODO: regex to find urlmon+URLDownloadToFileA on same line
  303 + #TODO: regex to find urlmon+URLDownloadToFileA on same line
295 304 ('URLDownloadToFileA', 'Msxml2.XMLHTTP', 'Microsoft.XMLHTTP'),
296 305 'May download files from the Internet using PowerShell':
297   - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
  306 + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
298 307 ('New-Object System.Net.WebClient', 'DownloadFile'),
299 308 'May control another application by simulating user keystrokes':
300 309 ('SendKeys', 'AppActivate'),
301   - #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx
  310 + #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx
302 311 'May attempt to obfuscate malicious function calls':
303 312 ('CallByName',),
304   - #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx
  313 + #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx
305 314 'May attempt to obfuscate specific strings':
306   - #TODO: regex to find several Chr*, not just one
  315 + #TODO: regex to find several Chr*, not just one
307 316 ('Chr', 'ChrB', 'ChrW', 'StrReverse', 'Xor'),
308   - #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx
  317 + #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx
309 318 'May read or write registry keys':
310   - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
  319 + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
311 320 ('RegOpenKeyExA', 'RegOpenKeyEx', 'RegCloseKey'),
312 321 'May read registry keys':
313   - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
  322 + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
314 323 ('RegQueryValueExA', 'RegQueryValueEx',
315 324 'RegRead', #with Wscript.Shell
316   - ),
  325 + ),
317 326 'May detect virtualization':
318   - # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
  327 + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
319 328 (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'),
320 329 'May detect Anubis Sandbox':
321   - # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
322   - # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA
323   - # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf
324   - ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll
  330 + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
  331 + # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA
  332 + # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf
  333 + ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll
325 334 '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId',
326 335 '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller'
327 336 ),
328 337 'May detect Sandboxie':
329   - # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
330   - # ref: http://www.cplusplus.com/forum/windows/96874/
  338 + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
  339 + # ref: http://www.cplusplus.com/forum/windows/96874/
331 340 ('SbieDll.dll', 'SandboxieControlWndClass'),
332 341 'May detect Sunbelt Sandbox':
333   - # ref: http://www.cplusplus.com/forum/windows/96874/
  342 + # ref: http://www.cplusplus.com/forum/windows/96874/
334 343 (r'C:\file.exe',),
335 344 'May detect Norman Sandbox':
336   - # ref: http://www.cplusplus.com/forum/windows/96874/
  345 + # ref: http://www.cplusplus.com/forum/windows/96874/
337 346 ('currentuser',),
338 347 'May detect CW Sandbox':
339   - # ref: http://www.cplusplus.com/forum/windows/96874/
  348 + # ref: http://www.cplusplus.com/forum/windows/96874/
340 349 ('Schmidti',),
341 350 'May detect WinJail Sandbox':
342   - # ref: http://www.cplusplus.com/forum/windows/96874/
  351 + # ref: http://www.cplusplus.com/forum/windows/96874/
343 352 ('Afx:400000:0',),
344 353 }
345 354  
... ... @@ -355,12 +364,12 @@ DNS_NAME = r&#39;(?:[a-zA-Z0-9\-\.]+\.&#39; + TLD + &#39;)&#39;
355 364 #TODO: IPv6 - see https://www.debuggex.com/
356 365 # A literal numeric IPv6 address may be given, but must be enclosed in [ ] e.g. [db8:0cec::99:123a]
357 366 NUMBER_0_255 = r'(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'
358   -IPv4 = r'(?:'+NUMBER_0_255+r'\.){3}'+NUMBER_0_255
  367 +IPv4 = r'(?:' + NUMBER_0_255 + r'\.){3}' + NUMBER_0_255
359 368 # IPv4 must come before the DNS name because it is more specific
360 369 SERVER = r'(?:' + IPv4 + '|' + DNS_NAME + ')'
361 370 PORT = r'(?:\:[0-9]{1,5})?'
362 371 SERVER_PORT = SERVER + PORT
363   -URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"]
  372 +URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"]
364 373 URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH
365 374 re_url = re.compile(URL_RE)
366 375  
... ... @@ -370,14 +379,15 @@ re_url = re.compile(URL_RE)
370 379 RE_PATTERNS = (
371 380 ('URL', re.compile(URL_RE)),
372 381 ('IPv4 address', re.compile(IPv4)),
373   - ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@'+SERVER+'\b')),
  382 + ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@' + SERVER + '\b')),
374 383 # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')),
375 384 # Executable file name with known extensions (except .com which is present in many URLs, and .application):
376   - ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|PIF|GADGET|MSI|MSP|MSC|VBS|VBE|VB|JSE|JS|WSF|WSC|WSH|WS|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1XML|PS1|PS2XML|PS2|PSC1|PSC2|SCF|LNK|INF|REG)\b")),
  385 + ("Executable file name", re.compile(
  386 + r"(?i)\b\w+\.(EXE|PIF|GADGET|MSI|MSP|MSC|VBS|VBE|VB|JSE|JS|WSF|WSC|WSH|WS|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1XML|PS1|PS2XML|PS2|PSC1|PSC2|SCF|LNK|INF|REG)\b")),
377 387 # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/
378 388 #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types
379 389 #('Hex string', re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')),
380   - )
  390 +)
381 391  
382 392 # regex to detect strings encoded in hexadecimal
383 393 re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')
... ... @@ -385,7 +395,8 @@ re_hex_string = re.compile(r&#39;(?:[0-9A-Fa-f]{2}){4,}&#39;)
385 395 # regex to detect strings encoded in base64
386 396 #re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"')
387 397 # better version from balbuzard, less false positives:
388   -re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"')
  398 +re_base64_string = re.compile(
  399 + r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"')
389 400 # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase):
390 401 BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit'])
391 402  
... ... @@ -414,7 +425,7 @@ def copytoken_help(decompressed_current, decompressed_chunk_start):
414 425 return length_mask, offset_mask, bit_count, maximum_length
415 426  
416 427  
417   -def decompress_stream (compressed_container):
  428 +def decompress_stream(compressed_container):
418 429 """
419 430 Decompress a stream according to MS-OVBA section 2.4.1
420 431  
... ... @@ -456,7 +467,8 @@ def decompress_stream (compressed_container):
456 467 # 2.4.1.1.5
457 468 compressed_chunk_start = compressed_current
458 469 # chunk header = first 16 bits
459   - compressed_chunk_header = struct.unpack("<H", compressed_container[compressed_chunk_start:compressed_chunk_start + 2])[0]
  470 + compressed_chunk_header = \
  471 + struct.unpack("<H", compressed_container[compressed_chunk_start:compressed_chunk_start + 2])[0]
460 472 # chunk size = 12 first bits of header + 3
461 473 chunk_size = (compressed_chunk_header & 0x0FFF) + 3
462 474 # chunk signature = 3 next bits - should always be 0b011
... ... @@ -510,13 +522,14 @@ def decompress_stream (compressed_container):
510 522 # MS-OVBA 2.4.1.3.17 Extract FlagBit
511 523 flag_bit = (flag_byte >> bit_index) & 1
512 524 #logging.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit))
513   - if flag_bit == 0: # LiteralToken
  525 + if flag_bit == 0: # LiteralToken
514 526 # copy one byte directly to output
515 527 decompressed_container += compressed_container[compressed_current]
516 528 compressed_current += 1
517   - else: # CopyToken
  529 + else: # CopyToken
518 530 # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken
519   - copy_token = struct.unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0]
  531 + copy_token = \
  532 + struct.unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0]
520 533 #TODO: check this
521 534 length_mask, offset_mask, bit_count, maximum_length = copytoken_help(
522 535 len(decompressed_container), decompressed_chunk_start)
... ... @@ -532,7 +545,7 @@ def decompress_stream (compressed_container):
532 545 return decompressed_container
533 546  
534 547  
535   -def _extract_vba (ole, vba_root, project_path, dir_path):
  548 +def _extract_vba(ole, vba_root, project_path, dir_path):
536 549 """
537 550 Extract VBA macros from an OleFileIO object.
538 551 Internal function, do not call directly.
... ... @@ -649,7 +662,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path):
649 662 check_value('PROJECTDOCSTRING_Id', 0x0005, PROJECTDOCSTRING_Id)
650 663 PROJECTDOCSTRING_SizeOfDocString = struct.unpack("<L", dir_stream.read(4))[0]
651 664 if PROJECTNAME_SizeOfProjectName > 2000:
652   - logging.error("PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString))
  665 + logging.error(
  666 + "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString))
653 667 PROJECTDOCSTRING_DocString = dir_stream.read(PROJECTDOCSTRING_SizeOfDocString)
654 668 PROJECTDOCSTRING_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
655 669 check_value('PROJECTDOCSTRING_Reserved', 0x0040, PROJECTDOCSTRING_Reserved)
... ... @@ -663,7 +677,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path):
663 677 check_value('PROJECTHELPFILEPATH_Id', 0x0006, PROJECTHELPFILEPATH_Id)
664 678 PROJECTHELPFILEPATH_SizeOfHelpFile1 = struct.unpack("<L", dir_stream.read(4))[0]
665 679 if PROJECTHELPFILEPATH_SizeOfHelpFile1 > 260:
666   - logging.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1))
  680 + logging.error(
  681 + "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1))
667 682 PROJECTHELPFILEPATH_HelpFile1 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile1)
668 683 PROJECTHELPFILEPATH_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
669 684 check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, PROJECTHELPFILEPATH_Reserved)
... ... @@ -702,7 +717,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path):
702 717 check_value('PROJECTCONSTANTS_Id', 0x000C, PROJECTCONSTANTS_Id)
703 718 PROJECTCONSTANTS_SizeOfConstants = struct.unpack("<L", dir_stream.read(4))[0]
704 719 if PROJECTCONSTANTS_SizeOfConstants > 1015:
705   - logging.error("PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants))
  720 + logging.error(
  721 + "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants))
706 722 PROJECTCONSTANTS_Constants = dir_stream.read(PROJECTCONSTANTS_SizeOfConstants)
707 723 PROJECTCONSTANTS_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
708 724 check_value('PROJECTCONSTANTS_Reserved', 0x003C, PROJECTCONSTANTS_Reserved)
... ... @@ -740,23 +756,26 @@ def _extract_vba (ole, vba_root, project_path, dir_path):
740 756 if check == 0x002F:
741 757 # REFERENCECONTROL
742 758 REFERENCECONTROL_Id = check
743   - REFERENCECONTROL_SizeTwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore
  759 + REFERENCECONTROL_SizeTwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore
744 760 REFERENCECONTROL_SizeOfLibidTwiddled = struct.unpack("<L", dir_stream.read(4))[0]
745 761 REFERENCECONTROL_LibidTwiddled = dir_stream.read(REFERENCECONTROL_SizeOfLibidTwiddled)
746   - REFERENCECONTROL_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore
  762 + REFERENCECONTROL_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore
747 763 check_value('REFERENCECONTROL_Reserved1', 0x0000, REFERENCECONTROL_Reserved1)
748   - REFERENCECONTROL_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore
  764 + REFERENCECONTROL_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore
749 765 check_value('REFERENCECONTROL_Reserved2', 0x0000, REFERENCECONTROL_Reserved2)
750 766 # optional field
751 767 check2 = struct.unpack("<H", dir_stream.read(2))[0]
752 768 if check2 == 0x0016:
753 769 REFERENCECONTROL_NameRecordExtended_Id = check
754 770 REFERENCECONTROL_NameRecordExtended_SizeofName = struct.unpack("<L", dir_stream.read(4))[0]
755   - REFERENCECONTROL_NameRecordExtended_Name = dir_stream.read(REFERENCECONTROL_NameRecordExtended_SizeofName)
  771 + REFERENCECONTROL_NameRecordExtended_Name = dir_stream.read(
  772 + REFERENCECONTROL_NameRecordExtended_SizeofName)
756 773 REFERENCECONTROL_NameRecordExtended_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
757   - check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, REFERENCECONTROL_NameRecordExtended_Reserved)
  774 + check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E,
  775 + REFERENCECONTROL_NameRecordExtended_Reserved)
758 776 REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]
759   - REFERENCECONTROL_NameRecordExtended_NameUnicode = dir_stream.read(REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode)
  777 + REFERENCECONTROL_NameRecordExtended_NameUnicode = dir_stream.read(
  778 + REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode)
760 779 REFERENCECONTROL_Reserved3 = struct.unpack("<H", dir_stream.read(2))[0]
761 780 else:
762 781 REFERENCECONTROL_Reserved3 = check2
... ... @@ -798,7 +817,7 @@ def _extract_vba (ole, vba_root, project_path, dir_path):
798 817 logging.error('invalid or unknown check Id {0:04X}'.format(check))
799 818 sys.exit(0)
800 819  
801   - PROJECTMODULES_Id = check #struct.unpack("<H", dir_stream.read(2))[0]
  820 + PROJECTMODULES_Id = check #struct.unpack("<H", dir_stream.read(2))[0]
802 821 check_value('PROJECTMODULES_Id', 0x000F, PROJECTMODULES_Id)
803 822 PROJECTMODULES_Size = struct.unpack("<L", dir_stream.read(4))[0]
804 823 check_value('PROJECTMODULES_Size', 0x0002, PROJECTMODULES_Size)
... ... @@ -878,7 +897,7 @@ def _extract_vba (ole, vba_root, project_path, dir_path):
878 897 MODULEPRIVATE_Reserved = struct.unpack("<L", dir_stream.read(4))[0]
879 898 check_value('MODULEPRIVATE_Reserved', 0x0000, MODULEPRIVATE_Reserved)
880 899 section_id = struct.unpack("<H", dir_stream.read(2))[0]
881   - if section_id == 0x002B: # TERMINATOR
  900 + if section_id == 0x002B: # TERMINATOR
882 901 MODULE_Reserved = struct.unpack("<L", dir_stream.read(4))[0]
883 902 check_value('MODULE_Reserved', 0x0000, MODULE_Reserved)
884 903 section_id = None
... ... @@ -964,9 +983,9 @@ def detect_autoexec(vba_code, obfuscation=None):
964 983 for keyword in keywords:
965 984 #TODO: if keyword is already a compiled regex, use it as-is
966 985 # search using regex to detect word boundaries:
967   - if re.search(r'(?i)\b'+keyword+r'\b', vba_code):
968   - #if keyword.lower() in vba_code:
969   - results.append((keyword, description+obf_text))
  986 + if re.search(r'(?i)\b' + keyword + r'\b', vba_code):
  987 + #if keyword.lower() in vba_code:
  988 + results.append((keyword, description + obf_text))
970 989 return results
971 990  
972 991  
... ... @@ -988,9 +1007,9 @@ def detect_suspicious(vba_code, obfuscation=None):
988 1007 for description, keywords in SUSPICIOUS_KEYWORDS.items():
989 1008 for keyword in keywords:
990 1009 # search using regex to detect word boundaries:
991   - if re.search(r'(?i)\b'+keyword+r'\b', vba_code):
992   - #if keyword.lower() in vba_code:
993   - results.append((keyword, description+obf_text))
  1010 + if re.search(r'(?i)\b' + keyword + r'\b', vba_code):
  1011 + #if keyword.lower() in vba_code:
  1012 + results.append((keyword, description + obf_text))
994 1013 return results
995 1014  
996 1015  
... ... @@ -1011,7 +1030,7 @@ def detect_patterns(vba_code, obfuscation=None):
1011 1030 for match in pattern_re.finditer(vba_code):
1012 1031 value = match.group()
1013 1032 if value not in found:
1014   - results.append((pattern_type+obf_text, value))
  1033 + results.append((pattern_type + obf_text, value))
1015 1034 found.add(value)
1016 1035 return results
1017 1036  
... ... @@ -1070,6 +1089,7 @@ def detect_dridex_strings(vba_code):
1070 1089 :return: list of str tuples (encoded string, decoded string)
1071 1090 """
1072 1091 from thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode
  1092 +
1073 1093 results = []
1074 1094 found = set()
1075 1095 for match in re_dridex_string.finditer(vba_code):
... ... @@ -1088,7 +1108,7 @@ def detect_dridex_strings(vba_code):
1088 1108 return results
1089 1109  
1090 1110  
1091   -class VBA_Scanner (object):
  1111 +class VBA_Scanner(object):
1092 1112 """
1093 1113 Class to scan the source code of a VBA module to find obfuscated strings,
1094 1114 suspicious keywords, IOCs, auto-executable macros, etc.
... ... @@ -1125,35 +1145,35 @@ class VBA_Scanner (object):
1125 1145 if 'strreverse' in self.code.lower(): self.strReverse = True
1126 1146 # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords:
1127 1147 for encoded, decoded in self.hex_strings:
1128   - self.code_hex += '\n'+decoded
  1148 + self.code_hex += '\n' + decoded
1129 1149 # if the code contains "StrReverse", also append the hex strings in reverse order:
1130 1150 if self.strReverse:
1131 1151 # StrReverse after hex decoding:
1132   - self.code_hex_rev += '\n'+decoded[::-1]
  1152 + self.code_hex_rev += '\n' + decoded[::-1]
1133 1153 # StrReverse before hex decoding:
1134   - self.code_rev_hex += '\n'+binascii.unhexlify(encoded[::-1])
  1154 + self.code_rev_hex += '\n' + binascii.unhexlify(encoded[::-1])
1135 1155 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
1136 1156 #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
1137 1157 # Detect Base64-encoded strings
1138 1158 self.base64_strings = detect_base64_strings(self.code)
1139 1159 for encoded, decoded in self.base64_strings:
1140   - self.code_base64 += '\n'+decoded
  1160 + self.code_base64 += '\n' + decoded
1141 1161 # Detect Dridex-encoded strings
1142 1162 self.dridex_strings = detect_dridex_strings(self.code)
1143 1163 for encoded, decoded in self.dridex_strings:
1144   - self.code_dridex += '\n'+decoded
  1164 + self.code_dridex += '\n' + decoded
1145 1165 results = []
1146 1166 self.autoexec_keywords = []
1147 1167 self.suspicious_keywords = []
1148 1168 self.iocs = []
1149 1169  
1150 1170 for code, obfuscation in (
1151   - (self.code, None),
1152   - (self.code_hex, 'Hex'),
1153   - (self.code_hex_rev, 'Hex+StrReverse'),
1154   - (self.code_rev_hex, 'StrReverse+Hex'),
1155   - (self.code_base64, 'Base64'),
1156   - (self.code_dridex, 'Dridex'),
  1171 + (self.code, None),
  1172 + (self.code_hex, 'Hex'),
  1173 + (self.code_hex_rev, 'Hex+StrReverse'),
  1174 + (self.code_rev_hex, 'StrReverse+Hex'),
  1175 + (self.code_base64, 'Base64'),
  1176 + (self.code_dridex, 'Dridex'),
1157 1177 ):
1158 1178 self.autoexec_keywords += detect_autoexec(code, obfuscation)
1159 1179 self.suspicious_keywords += detect_suspicious(code, obfuscation)
... ... @@ -1162,13 +1182,13 @@ class VBA_Scanner (object):
1162 1182 # If hex-encoded strings were discovered, add an item to suspicious keywords:
1163 1183 if self.hex_strings:
1164 1184 self.suspicious_keywords.append(('Hex Strings',
1165   - 'Hex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)'))
  1185 + 'Hex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)'))
1166 1186 if self.base64_strings:
1167 1187 self.suspicious_keywords.append(('Base64 Strings',
1168   - 'Base64-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)'))
  1188 + 'Base64-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)'))
1169 1189 if self.dridex_strings:
1170 1190 self.suspicious_keywords.append(('Dridex Strings',
1171   - 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)'))
  1191 + 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)'))
1172 1192 for keyword, description in self.autoexec_keywords:
1173 1193 results.append(('AutoExec', keyword, description))
1174 1194 for keyword, description in self.suspicious_keywords:
... ... @@ -1195,9 +1215,8 @@ class VBA_Scanner (object):
1195 1215 """
1196 1216 self.scan()
1197 1217 return (len(self.autoexec_keywords), len(self.suspicious_keywords),
1198   - len(self.iocs), len(self.hex_strings), len(self.base64_strings),
1199   - len(self.dridex_strings))
1200   -
  1218 + len(self.iocs), len(self.hex_strings), len(self.base64_strings),
  1219 + len(self.dridex_strings))
1201 1220  
1202 1221  
1203 1222 def scan_vba(vba_code, include_decoded_strings):
... ... @@ -1265,11 +1284,12 @@ class VBA_Parser(object):
1265 1284 # self.filename = '<file-like object>'
1266 1285 if olefile.isOleFile(_file):
1267 1286 # This looks like an OLE file
1268   - logging.info('Parsing OLE file %s' % self.filename)
  1287 + logging.info('Opening OLE file %s' % self.filename)
1269 1288 # Open and parse the OLE file, using unicode for path names:
1270   - self.ole_file = olefile.OleFileIO(_file, path_encoding=None)
1271 1289 self.type = TYPE_OLE
1272   - #TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet
  1290 + # TODO: handle OLE parsing exceptions
  1291 + self.ole_file = olefile.OleFileIO(_file, path_encoding=None)
  1292 + # TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet
1273 1293 elif zipfile.is_zipfile(_file):
1274 1294 # This looks like a zip file, need to look for vbaProject.bin inside
1275 1295 # It can be any OLE file inside the archive
... ... @@ -1279,7 +1299,7 @@ class VBA_Parser(object):
1279 1299 self.type = TYPE_OpenXML
1280 1300 z = zipfile.ZipFile(_file)
1281 1301 #TODO: check if this is actually an OpenXML file
1282   - #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically?
  1302 + #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically
1283 1303 # check each file within the zip if it is an OLE file, by reading its magic:
1284 1304 for subfile in z.namelist():
1285 1305 magic = z.open(subfile).read(len(olefile.MAGIC))
... ... @@ -1297,64 +1317,90 @@ class VBA_Parser(object):
1297 1317 # or a plain text file containing VBA code
1298 1318 if data is None:
1299 1319 data = open(filename, 'rb').read()
  1320 + # store a lowercase version for some tests:
  1321 + data_lowercase = data.lower()
1300 1322 # TODO: move each format parser to a separate method
1301 1323 # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
1302 1324 if 'http://schemas.microsoft.com/office/word/2003/wordml' in data:
1303 1325 logging.info('Opening Word 2003 XML file %s' % self.filename)
1304   - self.type = TYPE_Word2003_XML
1305   - # parse the XML content
1306   - et = ET.fromstring(data)
1307   - # find all the binData elements:
1308   - for bindata in et.getiterator(TAG_BINDATA):
1309   - # the binData content is an OLE container for the VBA project, compressed
1310   - # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
1311   - # get the filename:
1312   - fname = bindata.get(ATTR_NAME, 'noname.mso')
1313   - # decode the base64 activemime
1314   - activemime = binascii.a2b_base64(bindata.text)
1315   - # decompress the zlib data starting at offset 0x32, which is the OLE container:
1316   - ole_data = zlib.decompress(activemime[0x32:])
1317   - try:
1318   - self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
1319   - except:
1320   - logging.debug('%s is not a valid OLE file' % fname)
1321   - continue
1322   - # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
1323   - # TODO: check if Word accepts data before the MIME header, if is case-sensitive, etc.
1324   - elif data.lower().startswith('mime-version:'):
1325   - logging.info('Opening Word MHTML file %s' % self.filename)
1326   - self.type = TYPE_MHTML
1327   - # parse the MIME content
1328   - mhtml = email.message_from_string(data)
1329   - # find all the attached files:
1330   - for part in mhtml.walk():
1331   - content_type = part.get_content_type() # always returns a value
1332   - fname = part.get_filename(None) # returns None if it fails
1333   - logging.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type))
1334   - part_data = part.get_payload(decode=True)
1335   - # VBA macros are stored in a binary file named "editdata.mso".
1336   - # the data content is an OLE container for the VBA project, compressed
1337   - # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
1338   - # decompress the zlib data starting at offset 0x32, which is the OLE container:
1339   - try:
1340   - ole_data = zlib.decompress(part_data[0x32:])
1341   - except:
1342   - logging.debug('%s is not an ActiveMime container' % fname)
1343   - continue
1344   - try:
1345   - self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
1346   - except:
1347   - logging.debug('%s is not a valid OLE file' % fname)
1348   - continue
1349   - #TODO: handle exceptions
1350   - #TODO: Excel 2003 XML
1351   - #TODO: plain text VBA file
1352   - else:
1353   - msg = '%s is not an OLE nor an OpenXML file, cannot extract VBA Macros.' % self.filename
1354   - logging.error(msg)
1355   - raise TypeError(msg)
1356   -
1357   - def find_vba_projects (self):
  1326 + try:
  1327 + # parse the XML content
  1328 + # TODO: handle XML parsing exceptions
  1329 + et = ET.fromstring(data)
  1330 + # set type only if parsing succeeds
  1331 + self.type = TYPE_Word2003_XML
  1332 + # find all the binData elements:
  1333 + for bindata in et.getiterator(TAG_BINDATA):
  1334 + # the binData content is an OLE container for the VBA project, compressed
  1335 + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
  1336 + # get the filename:
  1337 + fname = bindata.get(ATTR_NAME, 'noname.mso')
  1338 + # decode the base64 activemime
  1339 + activemime = binascii.a2b_base64(bindata.text)
  1340 + # decompress the zlib data starting at offset 0x32, which is the OLE container:
  1341 + # TODO: handle different offsets => separate function
  1342 + ole_data = zlib.decompress(activemime[0x32:])
  1343 + try:
  1344 + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
  1345 + except:
  1346 + logging.debug('%s is not a valid OLE file' % fname)
  1347 + continue
  1348 + except:
  1349 + logging.exception('Failed XML parsing for file %r' % self.filename)
  1350 + pass
  1351 + # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"):
  1352 + # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line
  1353 + # BUT Word accepts a blank line or other MIME headers inserted before,
  1354 + # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored.
  1355 + # And the line is case insensitive.
  1356 + # so we'll just check the presence of mime, version and multipart anywhere:
  1357 + if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase and 'multipart' in data_lowercase:
  1358 + logging.info('Opening MHTML file %s' % self.filename)
  1359 + try:
  1360 + # parse the MIME content
  1361 + # remove any leading whitespace or newline (workaround for issue in email package)
  1362 + stripped_data = data.lstrip('\r\n\t ')
  1363 + mhtml = email.message_from_string(stripped_data)
  1364 + self.type = TYPE_MHTML
  1365 + # find all the attached files:
  1366 + for part in mhtml.walk():
  1367 + content_type = part.get_content_type() # always returns a value
  1368 + fname = part.get_filename(None) # returns None if it fails
  1369 + logging.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type))
  1370 + part_data = part.get_payload(decode=True)
  1371 + # VBA macros are stored in a binary file named "editdata.mso".
  1372 + # the data content is an OLE container for the VBA project, compressed
  1373 + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
  1374 + # decompress the zlib data starting at offset 0x32, which is the OLE container:
  1375 + # check ActiveMime header:
  1376 + if isinstance(part_data, str) and part_data.startswith(MSO_ACTIVEMIME_HEADER):
  1377 + logging.debug('Found ActiveMime header, decompressing MSO container')
  1378 + try:
  1379 + ole_data = zlib.decompress(part_data[0x32:])
  1380 + try:
  1381 + # TODO: check if it is actually an OLE file
  1382 + # TODO: get the MSO filename from content_location?
  1383 + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
  1384 + except:
  1385 + logging.debug('%s is not a valid OLE file' % fname)
  1386 + except:
  1387 + logging.error('Failed decompressing an MSO container in %r - %s'
  1388 + % (fname, MSG_OLEVBA_ISSUES))
  1389 + # TODO: bug here - need to split in smaller functions/classes?
  1390 + except:
  1391 + logging.exception('Failed MIME parsing for file %r - %s'
  1392 + % (self.filename, MSG_OLEVBA_ISSUES))
  1393 + pass
  1394 +
  1395 + #TODO: handle exceptions
  1396 + #TODO: Excel 2003 XML
  1397 + #TODO: plain text VBA file
  1398 + if self.type is None:
  1399 + msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename
  1400 + logging.error(msg)
  1401 + raise TypeError(msg)
  1402 +
  1403 + def find_vba_projects(self):
1358 1404 """
1359 1405 Finds all the VBA projects stored in an OLE file.
1360 1406  
... ... @@ -1465,7 +1511,7 @@ class VBA_Parser(object):
1465 1511 return True
1466 1512  
1467 1513  
1468   - def extract_macros (self):
  1514 + def extract_macros(self):
1469 1515 """
1470 1516 Extract and decompress source code for each VBA macro found in the file
1471 1517  
... ... @@ -1482,7 +1528,8 @@ class VBA_Parser(object):
1482 1528 self.find_vba_projects()
1483 1529 for vba_root, project_path, dir_path in self.vba_projects:
1484 1530 # extract all VBA macros from that VBA root storage:
1485   - for stream_path, vba_filename, vba_code in _extract_vba(self.ole_file, vba_root, project_path, dir_path):
  1531 + for stream_path, vba_filename, vba_code in _extract_vba(self.ole_file, vba_root, project_path,
  1532 + dir_path):
1486 1533 yield (self.filename, stream_path, vba_filename, vba_code)
1487 1534  
1488 1535  
... ... @@ -1520,8 +1567,7 @@ def print_analysis(vba_code, show_decoded_strings=False):
1520 1567 print 'No suspicious keyword or IOC found.'
1521 1568  
1522 1569  
1523   -
1524   -def process_file (container, filename, data, show_decoded_strings=False):
  1570 +def process_file(container, filename, data, show_decoded_strings=False):
1525 1571 """
1526 1572 Process a single file
1527 1573  
... ... @@ -1536,7 +1582,7 @@ def process_file (container, filename, data, show_decoded_strings=False):
1536 1582 display_filename = '%s in %s' % (filename, container)
1537 1583 else:
1538 1584 display_filename = filename
1539   - print '='*79
  1585 + print '=' * 79
1540 1586 print 'FILE:', display_filename
1541 1587 try:
1542 1588 #TODO: handle olefile errors, when an OLE file is malformed
... ... @@ -1548,22 +1594,22 @@ def process_file (container, filename, data, show_decoded_strings=False):
1548 1594 # hide attribute lines:
1549 1595 #TODO: option to disable attribute filtering
1550 1596 vba_code_filtered = filter_vba(vba_code)
1551   - print '-'*79
  1597 + print '-' * 79
1552 1598 print 'VBA MACRO %s ' % vba_filename
1553 1599 print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))
1554   - print '- '*39
  1600 + print '- ' * 39
1555 1601 # detect empty macros:
1556 1602 if vba_code_filtered.strip() == '':
1557 1603 print '(empty macro)'
1558 1604 else:
1559 1605 print vba_code_filtered
1560   - print '- '*39
  1606 + print '- ' * 39
1561 1607 print 'ANALYSIS:'
1562 1608 # analyse the whole code, filtered to avoid false positives:
1563 1609 print_analysis(vba_code_filtered, show_decoded_strings)
1564 1610 else:
1565 1611 print 'No VBA macros found.'
1566   - except: #TypeError:
  1612 + except: #TypeError:
1567 1613 #raise
1568 1614 #TODO: print more info if debug mode
1569 1615 #print sys.exc_value
... ... @@ -1572,7 +1618,7 @@ def process_file (container, filename, data, show_decoded_strings=False):
1572 1618 print ''
1573 1619  
1574 1620  
1575   -def process_file_triage (container, filename, data):
  1621 +def process_file_triage(container, filename, data):
1576 1622 """
1577 1623 Process a single file
1578 1624  
... ... @@ -1624,7 +1670,7 @@ def process_file_triage (container, filename, data):
1624 1670 if nb_base64strings: base64obf = 'B'
1625 1671 if nb_dridexstrings: dridex = 'D'
1626 1672 flags += '%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings,
1627   - base64obf, dridex)
  1673 + base64obf, dridex)
1628 1674  
1629 1675 # macros = autoexec = suspicious = iocs = hexstrings = 'no'
1630 1676 # if nb_macros: macros = 'YES:%d' % nb_macros
... ... @@ -1663,9 +1709,11 @@ def process_file_triage (container, filename, data):
1663 1709 # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings))
1664 1710 # print t
1665 1711  
  1712 +
1666 1713 def main_triage_quick():
1667 1714 pass
1668 1715  
  1716 +
1669 1717 #=== MAIN =====================================================================
1670 1718  
1671 1719 def main():
... ... @@ -1679,19 +1727,19 @@ def main():
1679 1727 # parser.add_option('-c', '--csv', dest='csv',
1680 1728 # help='export results to a CSV file')
1681 1729 parser.add_option("-r", action="store_true", dest="recursive",
1682   - help='find files recursively in subdirectories.')
  1730 + help='find files recursively in subdirectories.')
1683 1731 parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
1684   - help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')
  1732 + help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')
1685 1733 parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
1686   - help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
  1734 + help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
1687 1735 parser.add_option("-t", action="store_true", dest="triage_mode",
1688   - help='triage mode, display results as a summary table (default for multiple files)')
  1736 + help='triage mode, display results as a summary table (default for multiple files)')
1689 1737 parser.add_option("-d", action="store_true", dest="detailed_mode",
1690   - help='detailed mode, display full results (default for single file)')
  1738 + help='detailed mode, display full results (default for single file)')
1691 1739 parser.add_option("-i", "--input", dest='input', type='str', default=None,
1692   - help='input file containing VBA source code to be analyzed (no parsing)')
  1740 + help='input file containing VBA source code to be analyzed (no parsing)')
1693 1741 parser.add_option("--decode", action="store_true", dest="show_decoded_strings",
1694   - help='display all the obfuscated strings with their decoded content (Hex, Base64, StrReverse, Dridex).')
  1742 + help='display all the obfuscated strings with their decoded content (Hex, Base64, StrReverse, Dridex).')
1695 1743  
1696 1744 (options, args) = parser.parse_args()
1697 1745  
... ... @@ -1705,9 +1753,9 @@ def main():
1705 1753 print 'olevba %s - http://decalage.info/python/oletools' % __version__
1706 1754  
1707 1755 # TODO: option to set logging level, none by default
1708   - logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO)
  1756 + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) #.WARNING) #INFO)
1709 1757 # For now, all logging is disabled:
1710   - logging.disable(logging.CRITICAL)
  1758 + #logging.disable(logging.CRITICAL)
1711 1759  
1712 1760 if options.input:
1713 1761 # input file provided with VBA source code to be analyzed directly:
... ... @@ -1720,12 +1768,12 @@ def main():
1720 1768 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7)
1721 1769 if not options.detailed_mode or options.triage_mode:
1722 1770 print '%-11s %-65s' % ('Flags', 'Filename')
1723   - print '%-11s %-65s' % ('-'*11, '-'*65)
  1771 + print '%-11s %-65s' % ('-' * 11, '-' * 65)
1724 1772 previous_container = None
1725 1773 count = 0
1726 1774 container = filename = data = None
1727 1775 for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
1728   - zip_password=options.zip_password, zip_fname=options.zip_fname):
  1776 + zip_password=options.zip_password, zip_fname=options.zip_fname):
1729 1777 # ignore directory names stored in zip files:
1730 1778 if container and filename.endswith('/'):
1731 1779 continue
... ... @@ -1749,7 +1797,8 @@ def main():
1749 1797 #TODO: avoid doing the analysis twice by storing results
1750 1798 process_file(container, filename, data, show_decoded_strings=options.show_decoded_strings)
1751 1799  
  1800 +
1752 1801 if __name__ == '__main__':
1753 1802 main()
1754 1803  
1755   -# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness
1756 1804 \ No newline at end of file
  1805 + # This was coded while listening to "Dust" from I Love You But I've Chosen Darkness
1757 1806 \ No newline at end of file
... ...