Commit 4991f1ae8f2a6a42998ab3e5fd691bbbb1c08db9

Authored by Philippe Lagadec
1 parent 41d98ad5

olevba: added Base64 function decoding to VBA Parser

Showing 1 changed file with 25 additions and 4 deletions
oletools/olevba.py
@@ -144,11 +144,13 @@ https://github.com/unixfreak0037/officeparser @@ -144,11 +144,13 @@ https://github.com/unixfreak0037/officeparser
144 # - fix VBA_Scanner.scan to return raw strings, not repr() 144 # - fix VBA_Scanner.scan to return raw strings, not repr()
145 # 2015-07-09 v0.33 PL: - removed usage of sys.stderr which causes issues 145 # 2015-07-09 v0.33 PL: - removed usage of sys.stderr which causes issues
146 # 2015-07-12 PL: - added Hex function decoding to VBA Parser 146 # 2015-07-12 PL: - added Hex function decoding to VBA Parser
  147 +# 2015-07-13 PL: - added Base64 function decoding to VBA Parser
147 148
148 __version__ = '0.33' 149 __version__ = '0.33'
149 150
150 #------------------------------------------------------------------------------ 151 #------------------------------------------------------------------------------
151 # TODO: 152 # TODO:
  153 +# + dedup deobfuscation results
152 # + option --fast to disable VBA expressions parsing 154 # + option --fast to disable VBA expressions parsing
153 # + do not use logging, but a provided logger (null logger by default) 155 # + do not use logging, but a provided logger (null logger by default)
154 # + setup logging (common with other oletools) 156 # + setup logging (common with other oletools)
@@ -419,8 +421,9 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') @@ -419,8 +421,9 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')
419 # regex to detect strings encoded in base64 421 # regex to detect strings encoded in base64
420 #re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') 422 #re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"')
421 # better version from balbuzard, less false positives: 423 # better version from balbuzard, less false positives:
422 -re_base64_string = re.compile(  
423 - r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"') 424 +# (plain version without double quotes, used also below in quoted_base64_string)
  425 +BASE64_RE = r'(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?'
  426 +re_base64_string = re.compile('"' + BASE64_RE + '"')
424 # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): 427 # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase):
425 BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit']) 428 BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit'])
426 429
@@ -438,6 +441,9 @@ re_nothex_check = re.compile(r'[G-Zg-z]') @@ -438,6 +441,9 @@ re_nothex_check = re.compile(r'[G-Zg-z]')
438 # https://msdn.microsoft.com/en-us/library/dd361851.aspx 441 # https://msdn.microsoft.com/en-us/library/dd361851.aspx
439 # - pyparsing: http://pyparsing.wikispaces.com/ 442 # - pyparsing: http://pyparsing.wikispaces.com/
440 443
  444 +# TODO: set whitespaces according to VBA
  445 +# TODO: merge extended lines before parsing
  446 +
441 # VBA identifier chars (from MS-VBAL 3.3.5) 447 # VBA identifier chars (from MS-VBAL 3.3.5)
442 vba_identifier_chars = alphanums + '_' 448 vba_identifier_chars = alphanums + '_'
443 449
@@ -557,6 +563,7 @@ latin_identifier = Word(initChars=alphas, bodyChars=alphanums + '_') @@ -557,6 +563,7 @@ latin_identifier = Word(initChars=alphas, bodyChars=alphanums + '_')
557 # --- HEX FUNCTION ----------------------------------------------------------- 563 # --- HEX FUNCTION -----------------------------------------------------------
558 564
559 # match any custom function name with a hex string as argument: 565 # match any custom function name with a hex string as argument:
  566 +# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime
560 567
561 # quoted string of at least two hexadecimal numbers of two digits: 568 # quoted string of at least two hexadecimal numbers of two digits:
562 quoted_hex_string = Suppress('"') + Combine(Word(hexnums, exact=2) * (2, None)) + Suppress('"') 569 quoted_hex_string = Suppress('"') + Combine(Word(hexnums, exact=2) * (2, None)) + Suppress('"')
@@ -564,7 +571,21 @@ quoted_hex_string.setParseAction(lambda t: str(t[0])) @@ -564,7 +571,21 @@ quoted_hex_string.setParseAction(lambda t: str(t[0]))
564 571
565 hex_function_call = Suppress(latin_identifier) + Suppress('(') + \ 572 hex_function_call = Suppress(latin_identifier) + Suppress('(') + \
566 quoted_hex_string('hex_string') + Suppress(')') 573 quoted_hex_string('hex_string') + Suppress(')')
567 -hex_function_call.setParseAction(lambda t: binascii.a2b_hex(t.hex_string)) 574 +hex_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_hex(t.hex_string)))
  575 +
  576 +
  577 +# --- BASE64 FUNCTION -----------------------------------------------------------
  578 +
  579 +# match any custom function name with a Base64 string as argument:
  580 +# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime
  581 +
  582 +# quoted string of at least two hexadecimal numbers of two digits:
  583 +quoted_base64_string = Suppress('"') + Regex(BASE64_RE) + Suppress('"')
  584 +quoted_base64_string.setParseAction(lambda t: str(t[0]))
  585 +
  586 +base64_function_call = Suppress(latin_identifier) + Suppress('(') + \
  587 + quoted_base64_string('base64_string') + Suppress(')')
  588 +base64_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_base64(t.base64_string)))
568 589
569 590
570 # ---STRING EXPRESSION ------------------------------------------------------- 591 # ---STRING EXPRESSION -------------------------------------------------------
@@ -579,7 +600,7 @@ def concat_strings_list(tokens): @@ -579,7 +600,7 @@ def concat_strings_list(tokens):
579 return VbaExpressionString(''.join(strings)) 600 return VbaExpressionString(''.join(strings))
580 601
581 602
582 -vba_expr_str_item = (vba_chr | strReverse | environ | quoted_string | hex_function_call) 603 +vba_expr_str_item = (vba_chr | strReverse | environ | quoted_string | hex_function_call | base64_function_call)
583 604
584 vba_expr_str <<= infixNotation(vba_expr_str_item, 605 vba_expr_str <<= infixNotation(vba_expr_str_item,
585 [ 606 [