Commit bf3fd0aca14a1b3a2986a41cc8f00bcdee492bce

Authored by Philippe Lagadec
1 parent e6c4676f

olevba: display decoded strings which are printable by default, fixed VBA_Scanne…

…r.scan to return raw strings instead of repr(strings)
oletools/doc/olevba.md
@@ -34,7 +34,7 @@ by John William Davison, with significant modifications. @@ -34,7 +34,7 @@ by John William Davison, with significant modifications.
34 - Detect suspicious VBA keywords often used by malware 34 - Detect suspicious VBA keywords often used by malware
35 - Detect anti-sandboxing and anti-virtualization techniques 35 - Detect anti-sandboxing and anti-virtualization techniques
36 - Detect and decodes strings obfuscated with Hex/Base64/StrReverse/Dridex 36 - Detect and decodes strings obfuscated with Hex/Base64/StrReverse/Dridex
37 -- Deobfuscates VBA expressions with any combination of Chr, Asc, Val, StrReverse, Environ, +, \&, using a VBA parser built with 37 +- Deobfuscates VBA expressions with any combination of Chr, Asc, Val, StrReverse, Environ, +, &, using a VBA parser built with
38 [pyparsing](http://pyparsing.wikispaces.com) 38 [pyparsing](http://pyparsing.wikispaces.com)
39 - Extract IOCs/patterns of interest such as IP addresses, URLs, e-mail addresses and executable file names 39 - Extract IOCs/patterns of interest such as IP addresses, URLs, e-mail addresses and executable file names
40 - Scan multiple files and sample collections (wildcards, recursive) 40 - Scan multiple files and sample collections (wildcards, recursive)
oletools/olevba.py
@@ -140,8 +140,10 @@ https://github.com/unixfreak0037/officeparser @@ -140,8 +140,10 @@ https://github.com/unixfreak0037/officeparser
140 # Davy Douhine (issue #9), issue #13 140 # Davy Douhine (issue #9), issue #13
141 # 2015-06-16 v0.31 PL: - added generic VBA expression deobfuscation (chr,asc,etc) 141 # 2015-06-16 v0.31 PL: - added generic VBA expression deobfuscation (chr,asc,etc)
142 # 2015-06-19 PL: - added options -a, -c, --each, --attr 142 # 2015-06-19 PL: - added options -a, -c, --each, --attr
  143 +# 2015-06-21 v0.32 PL: - always display decoded strings which are printable
  144 +# - fix VBA_Scanner.scan to return raw strings, not repr()
143 145
144 -__version__ = '0.31' 146 +__version__ = '0.32'
145 147
146 #------------------------------------------------------------------------------ 148 #------------------------------------------------------------------------------
147 # TODO: 149 # TODO:
@@ -189,6 +191,7 @@ import base64 @@ -189,6 +191,7 @@ import base64
189 import traceback 191 import traceback
190 import zlib 192 import zlib
191 import email # for MHTML parsing 193 import email # for MHTML parsing
  194 +import string # for printable
192 195
193 # import lxml or ElementTree for XML parsing: 196 # import lxml or ElementTree for XML parsing:
194 try: 197 try:
@@ -650,6 +653,22 @@ def mso_file_extract(data): @@ -650,6 +653,22 @@ def mso_file_extract(data):
650 653
651 #--- FUNCTIONS ---------------------------------------------------------------- 654 #--- FUNCTIONS ----------------------------------------------------------------
652 655
  656 +# set of printable characters, for is_printable
  657 +_PRINTABLE_SET = set(string.printable)
  658 +
  659 +def is_printable(s):
  660 + """
  661 + returns True if string s only contains printable ASCII characters
  662 + (i.e. contained in string.printable)
  663 + This is similar to Python 3's str.isprintable, for Python 2.x.
  664 + :param s: str
  665 + :return: bool
  666 + """
  667 + # inspired from http://stackoverflow.com/questions/3636928/test-if-a-python-string-is-printable
  668 + # check if the set of chars from s is contained into the set of printable chars:
  669 + return set(s).issubset(_PRINTABLE_SET)
  670 +
  671 +
653 def copytoken_help(decompressed_current, decompressed_chunk_start): 672 def copytoken_help(decompressed_current, decompressed_chunk_start):
654 """ 673 """
655 compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help 674 compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help
@@ -1481,15 +1500,19 @@ class VBA_Scanner(object): @@ -1481,15 +1500,19 @@ class VBA_Scanner(object):
1481 results.append(('Suspicious', keyword, description)) 1500 results.append(('Suspicious', keyword, description))
1482 for pattern_type, value in self.iocs: 1501 for pattern_type, value in self.iocs:
1483 results.append(('IOC', value, pattern_type)) 1502 results.append(('IOC', value, pattern_type))
1484 - if include_decoded_strings:  
1485 - for encoded, decoded in self.hex_strings:  
1486 - results.append(('Hex String', repr(decoded), repr(encoded)))  
1487 - for encoded, decoded in self.base64_strings:  
1488 - results.append(('Base64 String', repr(decoded), repr(encoded)))  
1489 - for encoded, decoded in self.dridex_strings:  
1490 - results.append(('Dridex string', repr(decoded), repr(encoded)))  
1491 - for encoded, decoded in self.vba_strings:  
1492 - results.append(('VBA string', repr(decoded), repr(encoded))) 1503 + # include decoded strings only if they are printable or if --decode option:
  1504 + for encoded, decoded in self.hex_strings:
  1505 + if include_decoded_strings or is_printable(decoded):
  1506 + results.append(('Hex String', decoded, encoded))
  1507 + for encoded, decoded in self.base64_strings:
  1508 + if include_decoded_strings or is_printable(decoded):
  1509 + results.append(('Base64 String', decoded, encoded))
  1510 + for encoded, decoded in self.dridex_strings:
  1511 + if include_decoded_strings or is_printable(decoded):
  1512 + results.append(('Dridex string', decoded, encoded))
  1513 + for encoded, decoded in self.vba_strings:
  1514 + if include_decoded_strings or is_printable(decoded):
  1515 + results.append(('VBA string', decoded, encoded))
1493 return results 1516 return results
1494 1517
1495 def scan_summary(self): 1518 def scan_summary(self):
@@ -1854,6 +1877,11 @@ def print_analysis(vba_code, show_decoded_strings=False): @@ -1854,6 +1877,11 @@ def print_analysis(vba_code, show_decoded_strings=False):
1854 t.max_width['Keyword'] = 20 1877 t.max_width['Keyword'] = 20
1855 t.max_width['Description'] = 39 1878 t.max_width['Description'] = 39
1856 for kw_type, keyword, description in results: 1879 for kw_type, keyword, description in results:
  1880 + # handle non printable strings:
  1881 + if not is_printable(keyword):
  1882 + keyword = repr(keyword)
  1883 + if not is_printable(description):
  1884 + description = repr(description)
1857 t.add_row((kw_type, keyword, description)) 1885 t.add_row((kw_type, keyword, description))
1858 print t 1886 print t
1859 else: 1887 else: