Commit f6c210a13555da089f962540a8ff5a6724fe884d

Authored by Matteo Lodi
1 parent eb94f64e

added extraction of XLM macro from XLSM new format

Showing 1 changed file with 26 additions and 3 deletions
oletools/olevba.py
... ... @@ -837,6 +837,9 @@ URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"]
837 837 URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH
838 838 re_url = re.compile(URL_RE)
839 839  
  840 +EXCLUDE_URLS_PATTERNS = ["http://schemas.openxmlformats.org/",
  841 + "http://schemas.microsoft.com/",
  842 + ]
840 843  
841 844 # Patterns to be extracted (IP addresses, URLs, etc)
842 845 # From patterns.py in balbuzard
... ... @@ -2203,7 +2206,11 @@ def detect_patterns(vba_code, obfuscation=None):
2203 2206 for pattern_type, pattern_re in RE_PATTERNS:
2204 2207 for match in pattern_re.finditer(vba_code):
2205 2208 value = match.group()
2206   - if value not in found:
  2209 + exclude_pattern_found = False
  2210 + for url_exclude_pattern in EXCLUDE_URLS_PATTERNS:
  2211 + if value.startswith(url_exclude_pattern):
  2212 + exclude_pattern_found = True
  2213 + if value not in found and not exclude_pattern_found:
2207 2214 results.append((pattern_type + obf_text, value))
2208 2215 found.add(value)
2209 2216 return results
... ... @@ -2771,7 +2778,6 @@ class VBA_Parser(object):
2771 2778 log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc))
2772 2779 log.debug('Trace:', exc_info=True)
2773 2780  
2774   -
2775 2781 def open_openxml(self, _file):
2776 2782 """
2777 2783 Open an OpenXML file
... ... @@ -2789,9 +2795,23 @@ class VBA_Parser(object):
2789 2795 #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically
2790 2796 # check each file within the zip if it is an OLE file, by reading its magic:
2791 2797 for subfile in z.namelist():
  2798 + log.debug("subfile {}".format(subfile))
2792 2799 with z.open(subfile) as file_handle:
  2800 + found_ole = False
  2801 + xml_macrosheet_found = False
2793 2802 magic = file_handle.read(len(olefile.MAGIC))
2794   - if magic == olefile.MAGIC:
  2803 + if magic == olefile.MAGIC:
  2804 + found_ole = True
  2805 + # in case we did not find an OLE file, there could be a XLM macrosheet
  2806 + if not found_ole:
  2807 + macro_sheet_footer = b"</xm:macrosheet>"
  2808 + len_macro_sheet_footer = len(macro_sheet_footer)
  2809 + read_all_file = file_handle.read()
  2810 + last_bytes_to_check = read_all_file[-len_macro_sheet_footer:]
  2811 + if last_bytes_to_check == macro_sheet_footer:
  2812 + log.info("Found XLM Macro in subfile: {}".format(subfile))
  2813 + xml_macrosheet_found = True
  2814 + if found_ole or xml_macrosheet_found:
2795 2815 log.debug('Opening OLE file %s within zip' % subfile)
2796 2816 with z.open(subfile) as file_handle:
2797 2817 ole_data = file_handle.read()
... ... @@ -3181,6 +3201,7 @@ class VBA_Parser(object):
3181 3201  
3182 3202 :return: bool, True if at least one VBA project has been found, False otherwise
3183 3203 """
  3204 + log.debug("detect vba macros")
3184 3205 #TODO: return None or raise exception if format not supported
3185 3206 #TODO: return the number of VBA projects found instead of True/False?
3186 3207 # if this method was already called, return the previous result:
... ... @@ -3189,6 +3210,7 @@ class VBA_Parser(object):
3189 3210 # if OpenXML/PPT, check all the OLE subfiles:
3190 3211 if self.ole_file is None:
3191 3212 for ole_subfile in self.ole_subfiles:
  3213 + log.debug("ole subfile {}".format(ole_subfile))
3192 3214 ole_subfile.no_xlm = self.no_xlm
3193 3215 if ole_subfile.detect_vba_macros():
3194 3216 self.contains_macros = True
... ... @@ -3237,6 +3259,7 @@ class VBA_Parser(object):
3237 3259 return self.contains_macros
3238 3260  
3239 3261 def detect_xlm_macros(self):
  3262 + log.debug("detect xlm macros")
3240 3263 # if this is a SLK file, the analysis was done in open_slk:
3241 3264 if self.type == TYPE_SLK:
3242 3265 return self.contains_macros
... ...