Commit f6c210a13555da089f962540a8ff5a6724fe884d
1 parent
eb94f64e
added extraction of XLM macro from XLSM new format
Showing
1 changed file
with
26 additions
and
3 deletions
oletools/olevba.py
| ... | ... | @@ -837,6 +837,9 @@ URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"] |
| 837 | 837 | URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH |
| 838 | 838 | re_url = re.compile(URL_RE) |
| 839 | 839 | |
| 840 | +EXCLUDE_URLS_PATTERNS = ["http://schemas.openxmlformats.org/", | |
| 841 | + "http://schemas.microsoft.com/", | |
| 842 | + ] | |
| 840 | 843 | |
| 841 | 844 | # Patterns to be extracted (IP addresses, URLs, etc) |
| 842 | 845 | # From patterns.py in balbuzard |
| ... | ... | @@ -2203,7 +2206,11 @@ def detect_patterns(vba_code, obfuscation=None): |
| 2203 | 2206 | for pattern_type, pattern_re in RE_PATTERNS: |
| 2204 | 2207 | for match in pattern_re.finditer(vba_code): |
| 2205 | 2208 | value = match.group() |
| 2206 | - if value not in found: | |
| 2209 | + exclude_pattern_found = False | |
| 2210 | + for url_exclude_pattern in EXCLUDE_URLS_PATTERNS: | |
| 2211 | + if value.startswith(url_exclude_pattern): | |
| 2212 | + exclude_pattern_found = True | |
| 2213 | + if value not in found and not exclude_pattern_found: | |
| 2207 | 2214 | results.append((pattern_type + obf_text, value)) |
| 2208 | 2215 | found.add(value) |
| 2209 | 2216 | return results |
| ... | ... | @@ -2771,7 +2778,6 @@ class VBA_Parser(object): |
| 2771 | 2778 | log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc)) |
| 2772 | 2779 | log.debug('Trace:', exc_info=True) |
| 2773 | 2780 | |
| 2774 | - | |
| 2775 | 2781 | def open_openxml(self, _file): |
| 2776 | 2782 | """ |
| 2777 | 2783 | Open an OpenXML file |
| ... | ... | @@ -2789,9 +2795,23 @@ class VBA_Parser(object): |
| 2789 | 2795 | #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically |
| 2790 | 2796 | # check each file within the zip if it is an OLE file, by reading its magic: |
| 2791 | 2797 | for subfile in z.namelist(): |
| 2798 | + log.debug("subfile {}".format(subfile)) | |
| 2792 | 2799 | with z.open(subfile) as file_handle: |
| 2800 | + found_ole = False | |
| 2801 | + xml_macrosheet_found = False | |
| 2793 | 2802 | magic = file_handle.read(len(olefile.MAGIC)) |
| 2794 | - if magic == olefile.MAGIC: | |
| 2803 | + if magic == olefile.MAGIC: | |
| 2804 | + found_ole = True | |
| 2805 | + # in case we did not find an OLE file, there could be a XLM macrosheet | |
| 2806 | + if not found_ole: | |
| 2807 | + macro_sheet_footer = b"</xm:macrosheet>" | |
| 2808 | + len_macro_sheet_footer = len(macro_sheet_footer) | |
| 2809 | + read_all_file = file_handle.read() | |
| 2810 | + last_bytes_to_check = read_all_file[-len_macro_sheet_footer:] | |
| 2811 | + if last_bytes_to_check == macro_sheet_footer: | |
| 2812 | + log.info("Found XLM Macro in subfile: {}".format(subfile)) | |
| 2813 | + xml_macrosheet_found = True | |
| 2814 | + if found_ole or xml_macrosheet_found: | |
| 2795 | 2815 | log.debug('Opening OLE file %s within zip' % subfile) |
| 2796 | 2816 | with z.open(subfile) as file_handle: |
| 2797 | 2817 | ole_data = file_handle.read() |
| ... | ... | @@ -3181,6 +3201,7 @@ class VBA_Parser(object): |
| 3181 | 3201 | |
| 3182 | 3202 | :return: bool, True if at least one VBA project has been found, False otherwise |
| 3183 | 3203 | """ |
| 3204 | + log.debug("detect vba macros") | |
| 3184 | 3205 | #TODO: return None or raise exception if format not supported |
| 3185 | 3206 | #TODO: return the number of VBA projects found instead of True/False? |
| 3186 | 3207 | # if this method was already called, return the previous result: |
| ... | ... | @@ -3189,6 +3210,7 @@ class VBA_Parser(object): |
| 3189 | 3210 | # if OpenXML/PPT, check all the OLE subfiles: |
| 3190 | 3211 | if self.ole_file is None: |
| 3191 | 3212 | for ole_subfile in self.ole_subfiles: |
| 3213 | + log.debug("ole subfile {}".format(ole_subfile)) | |
| 3192 | 3214 | ole_subfile.no_xlm = self.no_xlm |
| 3193 | 3215 | if ole_subfile.detect_vba_macros(): |
| 3194 | 3216 | self.contains_macros = True |
| ... | ... | @@ -3237,6 +3259,7 @@ class VBA_Parser(object): |
| 3237 | 3259 | return self.contains_macros |
| 3238 | 3260 | |
| 3239 | 3261 | def detect_xlm_macros(self): |
| 3262 | + log.debug("detect xlm macros") | |
| 3240 | 3263 | # if this is a SLK file, the analysis was done in open_slk: |
| 3241 | 3264 | if self.type == TYPE_SLK: |
| 3242 | 3265 | return self.contains_macros | ... | ... |