Commit 4991f1ae8f2a6a42998ab3e5fd691bbbb1c08db9
1 parent
41d98ad5
olevba: added Base64 function decoding to VBA Parser
Showing
1 changed file
with
25 additions
and
4 deletions
oletools/olevba.py
| ... | ... | @@ -144,11 +144,13 @@ https://github.com/unixfreak0037/officeparser |
| 144 | 144 | # - fix VBA_Scanner.scan to return raw strings, not repr() |
| 145 | 145 | # 2015-07-09 v0.33 PL: - removed usage of sys.stderr which causes issues |
| 146 | 146 | # 2015-07-12 PL: - added Hex function decoding to VBA Parser |
| 147 | +# 2015-07-13 PL: - added Base64 function decoding to VBA Parser | |
| 147 | 148 | |
| 148 | 149 | __version__ = '0.33' |
| 149 | 150 | |
| 150 | 151 | #------------------------------------------------------------------------------ |
| 151 | 152 | # TODO: |
| 153 | +# + dedup deobfuscation results | |
| 152 | 154 | # + option --fast to disable VBA expressions parsing |
| 153 | 155 | # + do not use logging, but a provided logger (null logger by default) |
| 154 | 156 | # + setup logging (common with other oletools) |
| ... | ... | @@ -419,8 +421,9 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') |
| 419 | 421 | # regex to detect strings encoded in base64 |
| 420 | 422 | #re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') |
| 421 | 423 | # better version from balbuzard, less false positives: |
| 422 | -re_base64_string = re.compile( | |
| 423 | - r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"') | |
| 424 | +# (plain version without double quotes, used also below in quoted_base64_string) | |
| 425 | +BASE64_RE = r'(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?' | |
| 426 | +re_base64_string = re.compile('"' + BASE64_RE + '"') | |
| 424 | 427 | # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): |
| 425 | 428 | BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit']) |
| 426 | 429 | |
| ... | ... | @@ -438,6 +441,9 @@ re_nothex_check = re.compile(r'[G-Zg-z]') |
| 438 | 441 | # https://msdn.microsoft.com/en-us/library/dd361851.aspx |
| 439 | 442 | # - pyparsing: http://pyparsing.wikispaces.com/ |
| 440 | 443 | |
| 444 | +# TODO: set whitespaces according to VBA | |
| 445 | +# TODO: merge extended lines before parsing | |
| 446 | + | |
| 441 | 447 | # VBA identifier chars (from MS-VBAL 3.3.5) |
| 442 | 448 | vba_identifier_chars = alphanums + '_' |
| 443 | 449 | |
| ... | ... | @@ -557,6 +563,7 @@ latin_identifier = Word(initChars=alphas, bodyChars=alphanums + '_') |
| 557 | 563 | # --- HEX FUNCTION ----------------------------------------------------------- |
| 558 | 564 | |
| 559 | 565 | # match any custom function name with a hex string as argument: |
| 566 | +# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime | |
| 560 | 567 | |
| 561 | 568 | # quoted string of at least two hexadecimal numbers of two digits: |
| 562 | 569 | quoted_hex_string = Suppress('"') + Combine(Word(hexnums, exact=2) * (2, None)) + Suppress('"') |
| ... | ... | @@ -564,7 +571,21 @@ quoted_hex_string.setParseAction(lambda t: str(t[0])) |
| 564 | 571 | |
| 565 | 572 | hex_function_call = Suppress(latin_identifier) + Suppress('(') + \ |
| 566 | 573 | quoted_hex_string('hex_string') + Suppress(')') |
| 567 | -hex_function_call.setParseAction(lambda t: binascii.a2b_hex(t.hex_string)) | |
| 574 | +hex_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_hex(t.hex_string))) | |
| 575 | + | |
| 576 | + | |
| 577 | +# --- BASE64 FUNCTION ----------------------------------------------------------- | |
| 578 | + | |
| 579 | +# match any custom function name with a Base64 string as argument: | |
| 580 | +# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime | |
| 581 | + | |
| 582 | +# quoted string of at least two hexadecimal numbers of two digits: | |
| 583 | +quoted_base64_string = Suppress('"') + Regex(BASE64_RE) + Suppress('"') | |
| 584 | +quoted_base64_string.setParseAction(lambda t: str(t[0])) | |
| 585 | + | |
| 586 | +base64_function_call = Suppress(latin_identifier) + Suppress('(') + \ | |
| 587 | + quoted_base64_string('base64_string') + Suppress(')') | |
| 588 | +base64_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_base64(t.base64_string))) | |
| 568 | 589 | |
| 569 | 590 | |
| 570 | 591 | # ---STRING EXPRESSION ------------------------------------------------------- |
| ... | ... | @@ -579,7 +600,7 @@ def concat_strings_list(tokens): |
| 579 | 600 | return VbaExpressionString(''.join(strings)) |
| 580 | 601 | |
| 581 | 602 | |
| 582 | -vba_expr_str_item = (vba_chr | strReverse | environ | quoted_string | hex_function_call) | |
| 603 | +vba_expr_str_item = (vba_chr | strReverse | environ | quoted_string | hex_function_call | base64_function_call) | |
| 583 | 604 | |
| 584 | 605 | vba_expr_str <<= infixNotation(vba_expr_str_item, |
| 585 | 606 | [ | ... | ... |