Commit bf3fd0aca14a1b3a2986a41cc8f00bcdee492bce
1 parent
e6c4676f
olevba: display decoded strings which are printable by default, fixed VBA_Scanne…
…r.scan to return raw strings instead of repr(strings)
Showing
2 changed files
with
39 additions
and
11 deletions
oletools/doc/olevba.md
| ... | ... | @@ -34,7 +34,7 @@ by John William Davison, with significant modifications. |
| 34 | 34 | - Detect suspicious VBA keywords often used by malware |
| 35 | 35 | - Detect anti-sandboxing and anti-virtualization techniques |
| 36 | 36 | - Detect and decodes strings obfuscated with Hex/Base64/StrReverse/Dridex |
| 37 | -- Deobfuscates VBA expressions with any combination of Chr, Asc, Val, StrReverse, Environ, +, \&, using a VBA parser built with | |
| 37 | +- Deobfuscates VBA expressions with any combination of Chr, Asc, Val, StrReverse, Environ, +, &, using a VBA parser built with | |
| 38 | 38 | [pyparsing](http://pyparsing.wikispaces.com) |
| 39 | 39 | - Extract IOCs/patterns of interest such as IP addresses, URLs, e-mail addresses and executable file names |
| 40 | 40 | - Scan multiple files and sample collections (wildcards, recursive) | ... | ... |
oletools/olevba.py
| ... | ... | @@ -140,8 +140,10 @@ https://github.com/unixfreak0037/officeparser |
| 140 | 140 | # Davy Douhine (issue #9), issue #13 |
| 141 | 141 | # 2015-06-16 v0.31 PL: - added generic VBA expression deobfuscation (chr,asc,etc) |
| 142 | 142 | # 2015-06-19 PL: - added options -a, -c, --each, --attr |
| 143 | +# 2015-06-21 v0.32 PL: - always display decoded strings which are printable | |
| 144 | +# - fix VBA_Scanner.scan to return raw strings, not repr() | |
| 143 | 145 | |
| 144 | -__version__ = '0.31' | |
| 146 | +__version__ = '0.32' | |
| 145 | 147 | |
| 146 | 148 | #------------------------------------------------------------------------------ |
| 147 | 149 | # TODO: |
| ... | ... | @@ -189,6 +191,7 @@ import base64 |
| 189 | 191 | import traceback |
| 190 | 192 | import zlib |
| 191 | 193 | import email # for MHTML parsing |
| 194 | +import string # for printable | |
| 192 | 195 | |
| 193 | 196 | # import lxml or ElementTree for XML parsing: |
| 194 | 197 | try: |
| ... | ... | @@ -650,6 +653,22 @@ def mso_file_extract(data): |
| 650 | 653 | |
| 651 | 654 | #--- FUNCTIONS ---------------------------------------------------------------- |
| 652 | 655 | |
| 656 | +# set of printable characters, for is_printable | |
| 657 | +_PRINTABLE_SET = set(string.printable) | |
| 658 | + | |
| 659 | +def is_printable(s): | |
| 660 | + """ | |
| 661 | + returns True if string s only contains printable ASCII characters | |
| 662 | + (i.e. contained in string.printable) | |
| 663 | + This is similar to Python 3's str.isprintable, for Python 2.x. | |
| 664 | + :param s: str | |
| 665 | + :return: bool | |
| 666 | + """ | |
| 667 | + # inspired from http://stackoverflow.com/questions/3636928/test-if-a-python-string-is-printable | |
| 668 | + # check if the set of chars from s is contained into the set of printable chars: | |
| 669 | + return set(s).issubset(_PRINTABLE_SET) | |
| 670 | + | |
| 671 | + | |
| 653 | 672 | def copytoken_help(decompressed_current, decompressed_chunk_start): |
| 654 | 673 | """ |
| 655 | 674 | compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help |
| ... | ... | @@ -1481,15 +1500,19 @@ class VBA_Scanner(object): |
| 1481 | 1500 | results.append(('Suspicious', keyword, description)) |
| 1482 | 1501 | for pattern_type, value in self.iocs: |
| 1483 | 1502 | results.append(('IOC', value, pattern_type)) |
| 1484 | - if include_decoded_strings: | |
| 1485 | - for encoded, decoded in self.hex_strings: | |
| 1486 | - results.append(('Hex String', repr(decoded), repr(encoded))) | |
| 1487 | - for encoded, decoded in self.base64_strings: | |
| 1488 | - results.append(('Base64 String', repr(decoded), repr(encoded))) | |
| 1489 | - for encoded, decoded in self.dridex_strings: | |
| 1490 | - results.append(('Dridex string', repr(decoded), repr(encoded))) | |
| 1491 | - for encoded, decoded in self.vba_strings: | |
| 1492 | - results.append(('VBA string', repr(decoded), repr(encoded))) | |
| 1503 | + # include decoded strings only if they are printable or if --decode option: | |
| 1504 | + for encoded, decoded in self.hex_strings: | |
| 1505 | + if include_decoded_strings or is_printable(decoded): | |
| 1506 | + results.append(('Hex String', decoded, encoded)) | |
| 1507 | + for encoded, decoded in self.base64_strings: | |
| 1508 | + if include_decoded_strings or is_printable(decoded): | |
| 1509 | + results.append(('Base64 String', decoded, encoded)) | |
| 1510 | + for encoded, decoded in self.dridex_strings: | |
| 1511 | + if include_decoded_strings or is_printable(decoded): | |
| 1512 | + results.append(('Dridex string', decoded, encoded)) | |
| 1513 | + for encoded, decoded in self.vba_strings: | |
| 1514 | + if include_decoded_strings or is_printable(decoded): | |
| 1515 | + results.append(('VBA string', decoded, encoded)) | |
| 1493 | 1516 | return results |
| 1494 | 1517 | |
| 1495 | 1518 | def scan_summary(self): |
| ... | ... | @@ -1854,6 +1877,11 @@ def print_analysis(vba_code, show_decoded_strings=False): |
| 1854 | 1877 | t.max_width['Keyword'] = 20 |
| 1855 | 1878 | t.max_width['Description'] = 39 |
| 1856 | 1879 | for kw_type, keyword, description in results: |
| 1880 | + # handle non printable strings: | |
| 1881 | + if not is_printable(keyword): | |
| 1882 | + keyword = repr(keyword) | |
| 1883 | + if not is_printable(description): | |
| 1884 | + description = repr(description) | |
| 1857 | 1885 | t.add_row((kw_type, keyword, description)) |
| 1858 | 1886 | print t |
| 1859 | 1887 | else: | ... | ... |