Commit 4991f1ae8f2a6a42998ab3e5fd691bbbb1c08db9

Authored by Philippe Lagadec
1 parent 41d98ad5

olevba: added Base64 function decoding to VBA Parser

Showing 1 changed file with 25 additions and 4 deletions
oletools/olevba.py
... ... @@ -144,11 +144,13 @@ https://github.com/unixfreak0037/officeparser
144 144 # - fix VBA_Scanner.scan to return raw strings, not repr()
145 145 # 2015-07-09 v0.33 PL: - removed usage of sys.stderr which causes issues
146 146 # 2015-07-12 PL: - added Hex function decoding to VBA Parser
  147 +# 2015-07-13 PL: - added Base64 function decoding to VBA Parser
147 148  
148 149 __version__ = '0.33'
149 150  
150 151 #------------------------------------------------------------------------------
151 152 # TODO:
  153 +# + dedup deobfuscation results
152 154 # + option --fast to disable VBA expressions parsing
153 155 # + do not use logging, but a provided logger (null logger by default)
154 156 # + setup logging (common with other oletools)
... ... @@ -419,8 +421,9 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')
419 421 # regex to detect strings encoded in base64
420 422 #re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"')
421 423 # better version from balbuzard, less false positives:
422   -re_base64_string = re.compile(
423   - r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"')
  424 +# (plain version without double quotes, used also below in quoted_base64_string)
  425 +BASE64_RE = r'(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?'
  426 +re_base64_string = re.compile('"' + BASE64_RE + '"')
424 427 # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase):
425 428 BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit'])
426 429  
... ... @@ -438,6 +441,9 @@ re_nothex_check = re.compile(r'[G-Zg-z]')
438 441 # https://msdn.microsoft.com/en-us/library/dd361851.aspx
439 442 # - pyparsing: http://pyparsing.wikispaces.com/
440 443  
  444 +# TODO: set whitespaces according to VBA
  445 +# TODO: merge extended lines before parsing
  446 +
441 447 # VBA identifier chars (from MS-VBAL 3.3.5)
442 448 vba_identifier_chars = alphanums + '_'
443 449  
... ... @@ -557,6 +563,7 @@ latin_identifier = Word(initChars=alphas, bodyChars=alphanums + '_')
557 563 # --- HEX FUNCTION -----------------------------------------------------------
558 564  
559 565 # match any custom function name with a hex string as argument:
  566 +# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime
560 567  
561 568 # quoted string of at least two hexadecimal numbers of two digits:
562 569 quoted_hex_string = Suppress('"') + Combine(Word(hexnums, exact=2) * (2, None)) + Suppress('"')
... ... @@ -564,7 +571,21 @@ quoted_hex_string.setParseAction(lambda t: str(t[0]))
564 571  
565 572 hex_function_call = Suppress(latin_identifier) + Suppress('(') + \
566 573 quoted_hex_string('hex_string') + Suppress(')')
567   -hex_function_call.setParseAction(lambda t: binascii.a2b_hex(t.hex_string))
  574 +hex_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_hex(t.hex_string)))
  575 +
  576 +
  577 +# --- BASE64 FUNCTION -----------------------------------------------------------
  578 +
  579 +# match any custom function name with a Base64 string as argument:
  580 +# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime
  581 +
  582 +# quoted string of at least two hexadecimal numbers of two digits:
  583 +quoted_base64_string = Suppress('"') + Regex(BASE64_RE) + Suppress('"')
  584 +quoted_base64_string.setParseAction(lambda t: str(t[0]))
  585 +
  586 +base64_function_call = Suppress(latin_identifier) + Suppress('(') + \
  587 + quoted_base64_string('base64_string') + Suppress(')')
  588 +base64_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_base64(t.base64_string)))
568 589  
569 590  
570 591 # ---STRING EXPRESSION -------------------------------------------------------
... ... @@ -579,7 +600,7 @@ def concat_strings_list(tokens):
579 600 return VbaExpressionString(''.join(strings))
580 601  
581 602  
582   -vba_expr_str_item = (vba_chr | strReverse | environ | quoted_string | hex_function_call)
  603 +vba_expr_str_item = (vba_chr | strReverse | environ | quoted_string | hex_function_call | base64_function_call)
583 604  
584 605 vba_expr_str <<= infixNotation(vba_expr_str_item,
585 606 [
... ...