Commit bcbb60864a988956abcd2666ddb7740d45cf0cde

Authored by Philippe Lagadec
1 parent 5d3718da

olevba: added option --hex to show all hex strings decoded

Showing 2 changed files with 23 additions and 13 deletions
oletools/olevba.py
... ... @@ -108,8 +108,9 @@ https://github.com/unixfreak0037/officeparser
108 108 # 2015-01-23 v0.18 PL: - fixed issue #3, case-insensitive search in code_modules
109 109 # 2015-01-24 v0.19 PL: - improved the detection of IOCs obfuscated with hex
110 110 # strings and StrReverse
  111 +# 2015-01-26 v0.20 PL: - added option --hex to show all hex strings decoded
111 112  
112   -__version__ = '0.19'
  113 +__version__ = '0.20'
113 114  
114 115 #------------------------------------------------------------------------------
115 116 # TODO:
... ... @@ -893,13 +894,14 @@ def detect_hex_strings(vba_code):
893 894 return results
894 895  
895 896  
896   -def scan_vba(vba_code):
  897 +def scan_vba(vba_code, include_hex_strings=False):
897 898 """
898 899 Analyze the provided VBA code to detect suspicious keywords,
899 900 auto-executable macros, IOC patterns, obfuscation patterns
900 901 such as hex-encoded strings.
901 902  
902 903 :param vba_code: str, VBA source code to be analyzed
  904 + :param include_hex_strings: bool, if True hex-encoded strings will be included with their decoded content.
903 905 :return: list of tuples (type, keyword, description)
904 906 (type = 'AutoExec', 'Suspicious', 'IOC' or 'Hex String')
905 907 """
... ... @@ -918,11 +920,14 @@ def scan_vba(vba_code):
918 920 # StrReverse before hex decoding:
919 921 vba_code += '\n'+binascii.unhexlify(encoded[::-1])
920 922 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
  923 + #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
  924 + #TODO: show which IOCs have been found using hex, strrev or both
921 925 autoexec_keywords = detect_autoexec(vba_code)
922 926 suspicious_keywords = detect_suspicious(vba_code)
923 927 # If hex-encoded strings were discovered, add an item to suspicious keywords:
924 928 if hex_strings:
925   - suspicious_keywords.append(('Hex Strings', 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
  929 + suspicious_keywords.append(('Hex Strings',
  930 + 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
926 931 patterns = detect_patterns(vba_code)
927 932 results = []
928 933 for keyword, description in autoexec_keywords:
... ... @@ -931,9 +936,9 @@ def scan_vba(vba_code):
931 936 results.append(('Suspicious', keyword, description))
932 937 for pattern_type, value in patterns:
933 938 results.append(('IOC', value, pattern_type))
934   - # Only if option --hex:
935   - # for encoded, decoded in hex_strings:
936   - # results.append(('Hex String', repr(decoded), encoded))
  939 + if include_hex_strings:
  940 + for encoded, decoded in hex_strings:
  941 + results.append(('Hex String', repr(decoded), encoded))
937 942 return results
938 943  
939 944  
... ... @@ -994,6 +999,7 @@ class VBA_Parser(object):
994 999 self.type = TYPE_OpenXML
995 1000 z = zipfile.ZipFile(_file)
996 1001 #TODO: check if this is actually an OpenXML file
  1002 + #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically?
997 1003 # check each file within the zip if it is an OLE file, by reading its magic:
998 1004 for subfile in z.namelist():
999 1005 magic = z.open(subfile).read(len(olefile.MAGIC))
... ... @@ -1155,14 +1161,15 @@ class VBA_Parser(object):
1155 1161 self.ole_file.close()
1156 1162  
1157 1163  
1158   -def print_analysis(vba_code):
  1164 +def print_analysis(vba_code, show_hex_strings=False):
1159 1165 """
1160 1166 Analyze the provided VBA code, and print the results in a table
1161 1167  
1162 1168 :param vba_code: str, VBA source code to be analyzed
  1169 + :param show_hex_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
1163 1170 :return: None
1164 1171 """
1165   - results = scan_vba(vba_code)
  1172 + results = scan_vba(vba_code, show_hex_strings)
1166 1173 if results:
1167 1174 t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
1168 1175 t.align = 'l'
... ... @@ -1177,7 +1184,7 @@ def print_analysis(vba_code):
1177 1184  
1178 1185  
1179 1186  
1180   -def process_file (container, filename, data):
  1187 +def process_file (container, filename, data, show_hex_strings=False):
1181 1188 """
1182 1189 Process a single file
1183 1190  
... ... @@ -1185,6 +1192,7 @@ def process_file (container, filename, data):
1185 1192 a zip archive, None otherwise.
1186 1193 :param filename: str, path and filename of file on disk, or within the container.
1187 1194 :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
  1195 + :param show_hex_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
1188 1196 """
1189 1197 #TODO: replace print by writing to a provided output file (sys.stdout by default)
1190 1198 if container:
... ... @@ -1214,7 +1222,7 @@ def process_file (container, filename, data):
1214 1222 print vba_code
1215 1223 print '- '*39
1216 1224 print 'ANALYSIS:'
1217   - print_analysis(vba_code)
  1225 + print_analysis(vba_code, show_hex_strings)
1218 1226 else:
1219 1227 print 'No VBA macros found.'
1220 1228 except: #TypeError:
... ... @@ -1329,6 +1337,8 @@ def main():
1329 1337 help='detailed mode, display full results (default for single file)')
1330 1338 parser.add_option("-i", "--input", dest='input', type='str', default=None,
1331 1339 help='input file containing VBA source code to be analyzed (no parsing)')
  1340 + parser.add_option("--hex", action="store_true", dest="show_hex_strings",
  1341 + help='display all the hex-encoded strings with their decoded content.')
1332 1342  
1333 1343 (options, args) = parser.parse_args()
1334 1344  
... ... @@ -1364,7 +1374,7 @@ def main():
1364 1374 continue
1365 1375 if options.detailed_mode and not options.triage_mode:
1366 1376 # fully detailed output
1367   - process_file(container, filename, data)
  1377 + process_file(container, filename, data, show_hex_strings=options.show_hex_strings)
1368 1378 else:
1369 1379 # print container name when it changes:
1370 1380 if container != previous_container:
... ... @@ -1380,7 +1390,7 @@ def main():
1380 1390 if count == 1 and not options.triage_mode and not options.detailed_mode:
1381 1391 # if options -t and -d were not specified and it's a single file, print details:
1382 1392 #TODO: avoid doing the analysis twice by storing results
1383   - process_file(container, filename, data)
  1393 + process_file(container, filename, data, show_hex_strings=options.show_hex_strings)
1384 1394  
1385 1395 if __name__ == '__main__':
1386 1396 main()
... ...
setup.py
... ... @@ -33,7 +33,7 @@ import sys, os, fnmatch
33 33 #--- METADATA -----------------------------------------------------------------
34 34  
35 35 name = "oletools"
36   -version = '0.07'
  36 +version = '0.08a'
37 37 desc = "Python tools to analyze security characteristics of MS OLE2 files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), such as Microsoft Office documents, for Malware Analysis and Incident Response."
38 38 long_desc = open('oletools/README.rst').read()
39 39 author ="Philippe Lagadec"
... ...