Commit 4991f1ae8f2a6a42998ab3e5fd691bbbb1c08db9
1 parent
41d98ad5
olevba: added Base64 function decoding to VBA Parser
Showing
1 changed file
with
25 additions
and
4 deletions
oletools/olevba.py
| @@ -144,11 +144,13 @@ https://github.com/unixfreak0037/officeparser | @@ -144,11 +144,13 @@ https://github.com/unixfreak0037/officeparser | ||
| 144 | # - fix VBA_Scanner.scan to return raw strings, not repr() | 144 | # - fix VBA_Scanner.scan to return raw strings, not repr() |
| 145 | # 2015-07-09 v0.33 PL: - removed usage of sys.stderr which causes issues | 145 | # 2015-07-09 v0.33 PL: - removed usage of sys.stderr which causes issues |
| 146 | # 2015-07-12 PL: - added Hex function decoding to VBA Parser | 146 | # 2015-07-12 PL: - added Hex function decoding to VBA Parser |
| 147 | +# 2015-07-13 PL: - added Base64 function decoding to VBA Parser | ||
| 147 | 148 | ||
| 148 | __version__ = '0.33' | 149 | __version__ = '0.33' |
| 149 | 150 | ||
| 150 | #------------------------------------------------------------------------------ | 151 | #------------------------------------------------------------------------------ |
| 151 | # TODO: | 152 | # TODO: |
| 153 | +# + dedup deobfuscation results | ||
| 152 | # + option --fast to disable VBA expressions parsing | 154 | # + option --fast to disable VBA expressions parsing |
| 153 | # + do not use logging, but a provided logger (null logger by default) | 155 | # + do not use logging, but a provided logger (null logger by default) |
| 154 | # + setup logging (common with other oletools) | 156 | # + setup logging (common with other oletools) |
| @@ -419,8 +421,9 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | @@ -419,8 +421,9 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | ||
| 419 | # regex to detect strings encoded in base64 | 421 | # regex to detect strings encoded in base64 |
| 420 | #re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') | 422 | #re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') |
| 421 | # better version from balbuzard, less false positives: | 423 | # better version from balbuzard, less false positives: |
| 422 | -re_base64_string = re.compile( | ||
| 423 | - r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"') | 424 | +# (plain version without double quotes, used also below in quoted_base64_string) |
| 425 | +BASE64_RE = r'(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?' | ||
| 426 | +re_base64_string = re.compile('"' + BASE64_RE + '"') | ||
| 424 | # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): | 427 | # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): |
| 425 | BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit']) | 428 | BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit']) |
| 426 | 429 | ||
| @@ -438,6 +441,9 @@ re_nothex_check = re.compile(r'[G-Zg-z]') | @@ -438,6 +441,9 @@ re_nothex_check = re.compile(r'[G-Zg-z]') | ||
| 438 | # https://msdn.microsoft.com/en-us/library/dd361851.aspx | 441 | # https://msdn.microsoft.com/en-us/library/dd361851.aspx |
| 439 | # - pyparsing: http://pyparsing.wikispaces.com/ | 442 | # - pyparsing: http://pyparsing.wikispaces.com/ |
| 440 | 443 | ||
| 444 | +# TODO: set whitespaces according to VBA | ||
| 445 | +# TODO: merge extended lines before parsing | ||
| 446 | + | ||
| 441 | # VBA identifier chars (from MS-VBAL 3.3.5) | 447 | # VBA identifier chars (from MS-VBAL 3.3.5) |
| 442 | vba_identifier_chars = alphanums + '_' | 448 | vba_identifier_chars = alphanums + '_' |
| 443 | 449 | ||
| @@ -557,6 +563,7 @@ latin_identifier = Word(initChars=alphas, bodyChars=alphanums + '_') | @@ -557,6 +563,7 @@ latin_identifier = Word(initChars=alphas, bodyChars=alphanums + '_') | ||
| 557 | # --- HEX FUNCTION ----------------------------------------------------------- | 563 | # --- HEX FUNCTION ----------------------------------------------------------- |
| 558 | 564 | ||
| 559 | # match any custom function name with a hex string as argument: | 565 | # match any custom function name with a hex string as argument: |
| 566 | +# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime | ||
| 560 | 567 | ||
| 561 | # quoted string of at least two hexadecimal numbers of two digits: | 568 | # quoted string of at least two hexadecimal numbers of two digits: |
| 562 | quoted_hex_string = Suppress('"') + Combine(Word(hexnums, exact=2) * (2, None)) + Suppress('"') | 569 | quoted_hex_string = Suppress('"') + Combine(Word(hexnums, exact=2) * (2, None)) + Suppress('"') |
| @@ -564,7 +571,21 @@ quoted_hex_string.setParseAction(lambda t: str(t[0])) | @@ -564,7 +571,21 @@ quoted_hex_string.setParseAction(lambda t: str(t[0])) | ||
| 564 | 571 | ||
| 565 | hex_function_call = Suppress(latin_identifier) + Suppress('(') + \ | 572 | hex_function_call = Suppress(latin_identifier) + Suppress('(') + \ |
| 566 | quoted_hex_string('hex_string') + Suppress(')') | 573 | quoted_hex_string('hex_string') + Suppress(')') |
| 567 | -hex_function_call.setParseAction(lambda t: binascii.a2b_hex(t.hex_string)) | 574 | +hex_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_hex(t.hex_string))) |
| 575 | + | ||
| 576 | + | ||
| 577 | +# --- BASE64 FUNCTION ----------------------------------------------------------- | ||
| 578 | + | ||
| 579 | +# match any custom function name with a Base64 string as argument: | ||
| 580 | +# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime | ||
| 581 | + | ||
| 582 | +# quoted string of at least two hexadecimal numbers of two digits: | ||
| 583 | +quoted_base64_string = Suppress('"') + Regex(BASE64_RE) + Suppress('"') | ||
| 584 | +quoted_base64_string.setParseAction(lambda t: str(t[0])) | ||
| 585 | + | ||
| 586 | +base64_function_call = Suppress(latin_identifier) + Suppress('(') + \ | ||
| 587 | + quoted_base64_string('base64_string') + Suppress(')') | ||
| 588 | +base64_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_base64(t.base64_string))) | ||
| 568 | 589 | ||
| 569 | 590 | ||
| 570 | # ---STRING EXPRESSION ------------------------------------------------------- | 591 | # ---STRING EXPRESSION ------------------------------------------------------- |
| @@ -579,7 +600,7 @@ def concat_strings_list(tokens): | @@ -579,7 +600,7 @@ def concat_strings_list(tokens): | ||
| 579 | return VbaExpressionString(''.join(strings)) | 600 | return VbaExpressionString(''.join(strings)) |
| 580 | 601 | ||
| 581 | 602 | ||
| 582 | -vba_expr_str_item = (vba_chr | strReverse | environ | quoted_string | hex_function_call) | 603 | +vba_expr_str_item = (vba_chr | strReverse | environ | quoted_string | hex_function_call | base64_function_call) |
| 583 | 604 | ||
| 584 | vba_expr_str <<= infixNotation(vba_expr_str_item, | 605 | vba_expr_str <<= infixNotation(vba_expr_str_item, |
| 585 | [ | 606 | [ |