Commit bf3fd0aca14a1b3a2986a41cc8f00bcdee492bce

Authored by Philippe Lagadec
1 parent e6c4676f

olevba: display decoded strings which are printable by default, fixed VBA_Scanne…

…r.scan to return raw strings instead of repr(strings)
oletools/doc/olevba.md
... ... @@ -34,7 +34,7 @@ by John William Davison, with significant modifications.
34 34 - Detect suspicious VBA keywords often used by malware
35 35 - Detect anti-sandboxing and anti-virtualization techniques
36 36 - Detect and decodes strings obfuscated with Hex/Base64/StrReverse/Dridex
37   -- Deobfuscates VBA expressions with any combination of Chr, Asc, Val, StrReverse, Environ, +, \&, using a VBA parser built with
  37 +- Deobfuscates VBA expressions with any combination of Chr, Asc, Val, StrReverse, Environ, +, &, using a VBA parser built with
38 38 [pyparsing](http://pyparsing.wikispaces.com)
39 39 - Extract IOCs/patterns of interest such as IP addresses, URLs, e-mail addresses and executable file names
40 40 - Scan multiple files and sample collections (wildcards, recursive)
... ...
oletools/olevba.py
... ... @@ -140,8 +140,10 @@ https://github.com/unixfreak0037/officeparser
140 140 # Davy Douhine (issue #9), issue #13
141 141 # 2015-06-16 v0.31 PL: - added generic VBA expression deobfuscation (chr,asc,etc)
142 142 # 2015-06-19 PL: - added options -a, -c, --each, --attr
  143 +# 2015-06-21 v0.32 PL: - always display decoded strings which are printable
  144 +# - fix VBA_Scanner.scan to return raw strings, not repr()
143 145  
144   -__version__ = '0.31'
  146 +__version__ = '0.32'
145 147  
146 148 #------------------------------------------------------------------------------
147 149 # TODO:
... ... @@ -189,6 +191,7 @@ import base64
189 191 import traceback
190 192 import zlib
191 193 import email # for MHTML parsing
  194 +import string # for printable
192 195  
193 196 # import lxml or ElementTree for XML parsing:
194 197 try:
... ... @@ -650,6 +653,22 @@ def mso_file_extract(data):
650 653  
651 654 #--- FUNCTIONS ----------------------------------------------------------------
652 655  
  656 +# set of printable characters, for is_printable
  657 +_PRINTABLE_SET = set(string.printable)
  658 +
  659 +def is_printable(s):
  660 + """
  661 + returns True if string s only contains printable ASCII characters
  662 + (i.e. contained in string.printable)
  663 + This is similar to Python 3's str.isprintable, for Python 2.x.
  664 + :param s: str
  665 + :return: bool
  666 + """
  667 + # inspired from http://stackoverflow.com/questions/3636928/test-if-a-python-string-is-printable
  668 + # check if the set of chars from s is contained into the set of printable chars:
  669 + return set(s).issubset(_PRINTABLE_SET)
  670 +
  671 +
653 672 def copytoken_help(decompressed_current, decompressed_chunk_start):
654 673 """
655 674 compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help
... ... @@ -1481,15 +1500,19 @@ class VBA_Scanner(object):
1481 1500 results.append(('Suspicious', keyword, description))
1482 1501 for pattern_type, value in self.iocs:
1483 1502 results.append(('IOC', value, pattern_type))
1484   - if include_decoded_strings:
1485   - for encoded, decoded in self.hex_strings:
1486   - results.append(('Hex String', repr(decoded), repr(encoded)))
1487   - for encoded, decoded in self.base64_strings:
1488   - results.append(('Base64 String', repr(decoded), repr(encoded)))
1489   - for encoded, decoded in self.dridex_strings:
1490   - results.append(('Dridex string', repr(decoded), repr(encoded)))
1491   - for encoded, decoded in self.vba_strings:
1492   - results.append(('VBA string', repr(decoded), repr(encoded)))
  1503 + # include decoded strings only if they are printable or if --decode option:
  1504 + for encoded, decoded in self.hex_strings:
  1505 + if include_decoded_strings or is_printable(decoded):
  1506 + results.append(('Hex String', decoded, encoded))
  1507 + for encoded, decoded in self.base64_strings:
  1508 + if include_decoded_strings or is_printable(decoded):
  1509 + results.append(('Base64 String', decoded, encoded))
  1510 + for encoded, decoded in self.dridex_strings:
  1511 + if include_decoded_strings or is_printable(decoded):
  1512 + results.append(('Dridex string', decoded, encoded))
  1513 + for encoded, decoded in self.vba_strings:
  1514 + if include_decoded_strings or is_printable(decoded):
  1515 + results.append(('VBA string', decoded, encoded))
1493 1516 return results
1494 1517  
1495 1518 def scan_summary(self):
... ... @@ -1854,6 +1877,11 @@ def print_analysis(vba_code, show_decoded_strings=False):
1854 1877 t.max_width['Keyword'] = 20
1855 1878 t.max_width['Description'] = 39
1856 1879 for kw_type, keyword, description in results:
  1880 + # handle non printable strings:
  1881 + if not is_printable(keyword):
  1882 + keyword = repr(keyword)
  1883 + if not is_printable(description):
  1884 + description = repr(description)
1857 1885 t.add_row((kw_type, keyword, description))
1858 1886 print t
1859 1887 else:
... ...