Commit f6c210a13555da089f962540a8ff5a6724fe884d
1 parent
eb94f64e
added extraction of XLM macro from XLSM new format
Showing
1 changed file
with
26 additions
and
3 deletions
oletools/olevba.py
| @@ -837,6 +837,9 @@ URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"] | @@ -837,6 +837,9 @@ URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"] | ||
| 837 | URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH | 837 | URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH |
| 838 | re_url = re.compile(URL_RE) | 838 | re_url = re.compile(URL_RE) |
| 839 | 839 | ||
| 840 | +EXCLUDE_URLS_PATTERNS = ["http://schemas.openxmlformats.org/", | ||
| 841 | + "http://schemas.microsoft.com/", | ||
| 842 | + ] | ||
| 840 | 843 | ||
| 841 | # Patterns to be extracted (IP addresses, URLs, etc) | 844 | # Patterns to be extracted (IP addresses, URLs, etc) |
| 842 | # From patterns.py in balbuzard | 845 | # From patterns.py in balbuzard |
| @@ -2203,7 +2206,11 @@ def detect_patterns(vba_code, obfuscation=None): | @@ -2203,7 +2206,11 @@ def detect_patterns(vba_code, obfuscation=None): | ||
| 2203 | for pattern_type, pattern_re in RE_PATTERNS: | 2206 | for pattern_type, pattern_re in RE_PATTERNS: |
| 2204 | for match in pattern_re.finditer(vba_code): | 2207 | for match in pattern_re.finditer(vba_code): |
| 2205 | value = match.group() | 2208 | value = match.group() |
| 2206 | - if value not in found: | 2209 | + exclude_pattern_found = False |
| 2210 | + for url_exclude_pattern in EXCLUDE_URLS_PATTERNS: | ||
| 2211 | + if value.startswith(url_exclude_pattern): | ||
| 2212 | + exclude_pattern_found = True | ||
| 2213 | + if value not in found and not exclude_pattern_found: | ||
| 2207 | results.append((pattern_type + obf_text, value)) | 2214 | results.append((pattern_type + obf_text, value)) |
| 2208 | found.add(value) | 2215 | found.add(value) |
| 2209 | return results | 2216 | return results |
| @@ -2771,7 +2778,6 @@ class VBA_Parser(object): | @@ -2771,7 +2778,6 @@ class VBA_Parser(object): | ||
| 2771 | log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc)) | 2778 | log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc)) |
| 2772 | log.debug('Trace:', exc_info=True) | 2779 | log.debug('Trace:', exc_info=True) |
| 2773 | 2780 | ||
| 2774 | - | ||
| 2775 | def open_openxml(self, _file): | 2781 | def open_openxml(self, _file): |
| 2776 | """ | 2782 | """ |
| 2777 | Open an OpenXML file | 2783 | Open an OpenXML file |
| @@ -2789,9 +2795,23 @@ class VBA_Parser(object): | @@ -2789,9 +2795,23 @@ class VBA_Parser(object): | ||
| 2789 | #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically | 2795 | #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically |
| 2790 | # check each file within the zip if it is an OLE file, by reading its magic: | 2796 | # check each file within the zip if it is an OLE file, by reading its magic: |
| 2791 | for subfile in z.namelist(): | 2797 | for subfile in z.namelist(): |
| 2798 | + log.debug("subfile {}".format(subfile)) | ||
| 2792 | with z.open(subfile) as file_handle: | 2799 | with z.open(subfile) as file_handle: |
| 2800 | + found_ole = False | ||
| 2801 | + xml_macrosheet_found = False | ||
| 2793 | magic = file_handle.read(len(olefile.MAGIC)) | 2802 | magic = file_handle.read(len(olefile.MAGIC)) |
| 2794 | - if magic == olefile.MAGIC: | 2803 | + if magic == olefile.MAGIC: |
| 2804 | + found_ole = True | ||
| 2805 | + # in case we did not find an OLE file, there could be a XLM macrosheet | ||
| 2806 | + if not found_ole: | ||
| 2807 | + macro_sheet_footer = b"</xm:macrosheet>" | ||
| 2808 | + len_macro_sheet_footer = len(macro_sheet_footer) | ||
| 2809 | + read_all_file = file_handle.read() | ||
| 2810 | + last_bytes_to_check = read_all_file[-len_macro_sheet_footer:] | ||
| 2811 | + if last_bytes_to_check == macro_sheet_footer: | ||
| 2812 | + log.info("Found XLM Macro in subfile: {}".format(subfile)) | ||
| 2813 | + xml_macrosheet_found = True | ||
| 2814 | + if found_ole or xml_macrosheet_found: | ||
| 2795 | log.debug('Opening OLE file %s within zip' % subfile) | 2815 | log.debug('Opening OLE file %s within zip' % subfile) |
| 2796 | with z.open(subfile) as file_handle: | 2816 | with z.open(subfile) as file_handle: |
| 2797 | ole_data = file_handle.read() | 2817 | ole_data = file_handle.read() |
| @@ -3181,6 +3201,7 @@ class VBA_Parser(object): | @@ -3181,6 +3201,7 @@ class VBA_Parser(object): | ||
| 3181 | 3201 | ||
| 3182 | :return: bool, True if at least one VBA project has been found, False otherwise | 3202 | :return: bool, True if at least one VBA project has been found, False otherwise |
| 3183 | """ | 3203 | """ |
| 3204 | + log.debug("detect vba macros") | ||
| 3184 | #TODO: return None or raise exception if format not supported | 3205 | #TODO: return None or raise exception if format not supported |
| 3185 | #TODO: return the number of VBA projects found instead of True/False? | 3206 | #TODO: return the number of VBA projects found instead of True/False? |
| 3186 | # if this method was already called, return the previous result: | 3207 | # if this method was already called, return the previous result: |
| @@ -3189,6 +3210,7 @@ class VBA_Parser(object): | @@ -3189,6 +3210,7 @@ class VBA_Parser(object): | ||
| 3189 | # if OpenXML/PPT, check all the OLE subfiles: | 3210 | # if OpenXML/PPT, check all the OLE subfiles: |
| 3190 | if self.ole_file is None: | 3211 | if self.ole_file is None: |
| 3191 | for ole_subfile in self.ole_subfiles: | 3212 | for ole_subfile in self.ole_subfiles: |
| 3213 | + log.debug("ole subfile {}".format(ole_subfile)) | ||
| 3192 | ole_subfile.no_xlm = self.no_xlm | 3214 | ole_subfile.no_xlm = self.no_xlm |
| 3193 | if ole_subfile.detect_vba_macros(): | 3215 | if ole_subfile.detect_vba_macros(): |
| 3194 | self.contains_macros = True | 3216 | self.contains_macros = True |
| @@ -3237,6 +3259,7 @@ class VBA_Parser(object): | @@ -3237,6 +3259,7 @@ class VBA_Parser(object): | ||
| 3237 | return self.contains_macros | 3259 | return self.contains_macros |
| 3238 | 3260 | ||
| 3239 | def detect_xlm_macros(self): | 3261 | def detect_xlm_macros(self): |
| 3262 | + log.debug("detect xlm macros") | ||
| 3240 | # if this is a SLK file, the analysis was done in open_slk: | 3263 | # if this is a SLK file, the analysis was done in open_slk: |
| 3241 | if self.type == TYPE_SLK: | 3264 | if self.type == TYPE_SLK: |
| 3242 | return self.contains_macros | 3265 | return self.contains_macros |