Commit f6c210a13555da089f962540a8ff5a6724fe884d

Authored by Matteo Lodi
1 parent eb94f64e

added extraction of XLM macro from XLSM new format

Showing 1 changed file with 26 additions and 3 deletions
oletools/olevba.py
@@ -837,6 +837,9 @@ URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"] @@ -837,6 +837,9 @@ URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"]
837 URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH 837 URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH
838 re_url = re.compile(URL_RE) 838 re_url = re.compile(URL_RE)
839 839
  840 +EXCLUDE_URLS_PATTERNS = ["http://schemas.openxmlformats.org/",
  841 + "http://schemas.microsoft.com/",
  842 + ]
840 843
841 # Patterns to be extracted (IP addresses, URLs, etc) 844 # Patterns to be extracted (IP addresses, URLs, etc)
842 # From patterns.py in balbuzard 845 # From patterns.py in balbuzard
@@ -2203,7 +2206,11 @@ def detect_patterns(vba_code, obfuscation=None): @@ -2203,7 +2206,11 @@ def detect_patterns(vba_code, obfuscation=None):
2203 for pattern_type, pattern_re in RE_PATTERNS: 2206 for pattern_type, pattern_re in RE_PATTERNS:
2204 for match in pattern_re.finditer(vba_code): 2207 for match in pattern_re.finditer(vba_code):
2205 value = match.group() 2208 value = match.group()
2206 - if value not in found: 2209 + exclude_pattern_found = False
  2210 + for url_exclude_pattern in EXCLUDE_URLS_PATTERNS:
  2211 + if value.startswith(url_exclude_pattern):
  2212 + exclude_pattern_found = True
  2213 + if value not in found and not exclude_pattern_found:
2207 results.append((pattern_type + obf_text, value)) 2214 results.append((pattern_type + obf_text, value))
2208 found.add(value) 2215 found.add(value)
2209 return results 2216 return results
@@ -2771,7 +2778,6 @@ class VBA_Parser(object): @@ -2771,7 +2778,6 @@ class VBA_Parser(object):
2771 log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc)) 2778 log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc))
2772 log.debug('Trace:', exc_info=True) 2779 log.debug('Trace:', exc_info=True)
2773 2780
2774 -  
2775 def open_openxml(self, _file): 2781 def open_openxml(self, _file):
2776 """ 2782 """
2777 Open an OpenXML file 2783 Open an OpenXML file
@@ -2789,9 +2795,23 @@ class VBA_Parser(object): @@ -2789,9 +2795,23 @@ class VBA_Parser(object):
2789 #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically 2795 #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically
2790 # check each file within the zip if it is an OLE file, by reading its magic: 2796 # check each file within the zip if it is an OLE file, by reading its magic:
2791 for subfile in z.namelist(): 2797 for subfile in z.namelist():
  2798 + log.debug("subfile {}".format(subfile))
2792 with z.open(subfile) as file_handle: 2799 with z.open(subfile) as file_handle:
  2800 + found_ole = False
  2801 + xml_macrosheet_found = False
2793 magic = file_handle.read(len(olefile.MAGIC)) 2802 magic = file_handle.read(len(olefile.MAGIC))
2794 - if magic == olefile.MAGIC: 2803 + if magic == olefile.MAGIC:
  2804 + found_ole = True
  2805 + # in case we did not find an OLE file, there could be a XLM macrosheet
  2806 + if not found_ole:
  2807 + macro_sheet_footer = b"</xm:macrosheet>"
  2808 + len_macro_sheet_footer = len(macro_sheet_footer)
  2809 + read_all_file = file_handle.read()
  2810 + last_bytes_to_check = read_all_file[-len_macro_sheet_footer:]
  2811 + if last_bytes_to_check == macro_sheet_footer:
  2812 + log.info("Found XLM Macro in subfile: {}".format(subfile))
  2813 + xml_macrosheet_found = True
  2814 + if found_ole or xml_macrosheet_found:
2795 log.debug('Opening OLE file %s within zip' % subfile) 2815 log.debug('Opening OLE file %s within zip' % subfile)
2796 with z.open(subfile) as file_handle: 2816 with z.open(subfile) as file_handle:
2797 ole_data = file_handle.read() 2817 ole_data = file_handle.read()
@@ -3181,6 +3201,7 @@ class VBA_Parser(object): @@ -3181,6 +3201,7 @@ class VBA_Parser(object):
3181 3201
3182 :return: bool, True if at least one VBA project has been found, False otherwise 3202 :return: bool, True if at least one VBA project has been found, False otherwise
3183 """ 3203 """
  3204 + log.debug("detect vba macros")
3184 #TODO: return None or raise exception if format not supported 3205 #TODO: return None or raise exception if format not supported
3185 #TODO: return the number of VBA projects found instead of True/False? 3206 #TODO: return the number of VBA projects found instead of True/False?
3186 # if this method was already called, return the previous result: 3207 # if this method was already called, return the previous result:
@@ -3189,6 +3210,7 @@ class VBA_Parser(object): @@ -3189,6 +3210,7 @@ class VBA_Parser(object):
3189 # if OpenXML/PPT, check all the OLE subfiles: 3210 # if OpenXML/PPT, check all the OLE subfiles:
3190 if self.ole_file is None: 3211 if self.ole_file is None:
3191 for ole_subfile in self.ole_subfiles: 3212 for ole_subfile in self.ole_subfiles:
  3213 + log.debug("ole subfile {}".format(ole_subfile))
3192 ole_subfile.no_xlm = self.no_xlm 3214 ole_subfile.no_xlm = self.no_xlm
3193 if ole_subfile.detect_vba_macros(): 3215 if ole_subfile.detect_vba_macros():
3194 self.contains_macros = True 3216 self.contains_macros = True
@@ -3237,6 +3259,7 @@ class VBA_Parser(object): @@ -3237,6 +3259,7 @@ class VBA_Parser(object):
3237 return self.contains_macros 3259 return self.contains_macros
3238 3260
3239 def detect_xlm_macros(self): 3261 def detect_xlm_macros(self):
  3262 + log.debug("detect xlm macros")
3240 # if this is a SLK file, the analysis was done in open_slk: 3263 # if this is a SLK file, the analysis was done in open_slk:
3241 if self.type == TYPE_SLK: 3264 if self.type == TYPE_SLK:
3242 return self.contains_macros 3265 return self.contains_macros