Commit 64f57029292e6dd169925dc5a874a5ef25319cf9

Authored by Philippe Lagadec
1 parent b23bfde7

olevba: added VBA Form strings extraction and analysis

Showing 1 changed file with 102 additions and 1 deletions
oletools/olevba.py
@@ -163,8 +163,9 @@ https://github.com/unixfreak0037/officeparser @@ -163,8 +163,9 @@ https://github.com/unixfreak0037/officeparser
163 # 2016-02-07 PL: - KeyboardInterrupt is now raised properly 163 # 2016-02-07 PL: - KeyboardInterrupt is now raised properly
164 # 2016-02-20 v0.43 PL: - fixed issue #34 in the VBA parser and vba_chr 164 # 2016-02-20 v0.43 PL: - fixed issue #34 in the VBA parser and vba_chr
165 # 2016-02-29 PL: - added Workbook_Activate to suspicious keywords 165 # 2016-02-29 PL: - added Workbook_Activate to suspicious keywords
  166 +# 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis
166 167
167 -__version__ = '0.43' 168 +__version__ = '0.44'
168 169
169 #------------------------------------------------------------------------------ 170 #------------------------------------------------------------------------------
170 # TODO: 171 # TODO:
@@ -510,6 +511,9 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') @@ -510,6 +511,9 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"')
510 # regex to check that it is not just a hex string: 511 # regex to check that it is not just a hex string:
511 re_nothex_check = re.compile(r'[G-Zg-z]') 512 re_nothex_check = re.compile(r'[G-Zg-z]')
512 513
  514 +# regex to extract printable strings (at least 5 chars) from VBA Forms:
  515 +re_printable_string = re.compile(r'[\t\r\n\x20-\xFF]{5,}')
  516 +
513 517
514 # === PARTIAL VBA GRAMMAR ==================================================== 518 # === PARTIAL VBA GRAMMAR ====================================================
515 519
@@ -1861,6 +1865,7 @@ class VBA_Parser(object): @@ -1861,6 +1865,7 @@ class VBA_Parser(object):
1861 self.container = container 1865 self.container = container
1862 self.type = None 1866 self.type = None
1863 self.vba_projects = None 1867 self.vba_projects = None
  1868 + self.vba_forms = None
1864 self.contains_macros = None # will be set to True or False by detect_macros 1869 self.contains_macros = None # will be set to True or False by detect_macros
1865 self.vba_code_all_modules = None # to store the source code of all modules 1870 self.vba_code_all_modules = None # to store the source code of all modules
1866 # list of tuples for each module: (subfilename, stream_path, vba_filename, vba_code) 1871 # list of tuples for each module: (subfilename, stream_path, vba_filename, vba_code)
@@ -2304,6 +2309,8 @@ class VBA_Parser(object): @@ -2304,6 +2309,8 @@ class VBA_Parser(object):
2304 for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): 2309 for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
2305 #TODO: filter code? (each module) 2310 #TODO: filter code? (each module)
2306 self.vba_code_all_modules += vba_code + '\n' 2311 self.vba_code_all_modules += vba_code + '\n'
  2312 + for (subfilename, form_path, form_string) in self.extract_form_strings():
  2313 + self.vba_code_all_modules += form_string + '\n'
2307 # Analyze the whole code at once: 2314 # Analyze the whole code at once:
2308 scanner = VBA_Scanner(self.vba_code_all_modules) 2315 scanner = VBA_Scanner(self.vba_code_all_modules)
2309 self.analysis_results = scanner.scan(show_decoded_strings) 2316 self.analysis_results = scanner.scan(show_decoded_strings)
@@ -2338,6 +2345,95 @@ class VBA_Parser(object): @@ -2338,6 +2345,95 @@ class VBA_Parser(object):
2338 #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees 2345 #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees
2339 2346
2340 2347
  2348 + def find_vba_forms(self):
  2349 + """
  2350 + Finds all the VBA forms stored in an OLE file.
  2351 +
  2352 + Return None if the file is not OLE but OpenXML.
  2353 + Return a list of tuples (vba_root, project_path, dir_path) for each VBA project.
  2354 + vba_root is the path of the root OLE storage containing the VBA project,
  2355 + including a trailing slash unless it is the root of the OLE file.
  2356 + project_path is the path of the OLE stream named "PROJECT" within the VBA project.
  2357 + dir_path is the path of the OLE stream named "VBA/dir" within the VBA project.
  2358 +
  2359 + If this function returns an empty list for one of the supported formats
  2360 + (i.e. Word, Excel, Powerpoint except Powerpoint 97-2003), then the
  2361 + file does not contain VBA macros.
  2362 +
  2363 + :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path)
  2364 + for each VBA project found if OLE file
  2365 + """
  2366 + log.debug('VBA_Parser.find_vba_forms')
  2367 + # if the file is not OLE but OpenXML, return None:
  2368 + if self.ole_file is None:
  2369 + return None
  2370 +
  2371 + # if this method has already been called, return previous result:
  2372 + # if self.vba_projects is not None:
  2373 + # return self.vba_projects
  2374 +
  2375 + # According to MS-OFORMS section 2.1.2 Control Streams:
  2376 + # - A parent control, that is, a control that can contain embedded controls,
  2377 + # MUST be persisted as a storage that contains multiple streams.
  2378 + # - All parent controls MUST contain a FormControl. The FormControl
  2379 + # properties are persisted to a stream (1) as specified in section 2.1.1.2.
  2380 + # The name of this stream (1) MUST be "f".
  2381 + # - Embedded controls that cannot themselves contain other embedded
  2382 + # controls are persisted sequentially as FormEmbeddedActiveXControls
  2383 + # to a stream (1) contained in the same storage as the parent control.
  2384 + # The name of this stream (1) MUST be "o".
  2385 + # - all names are case-insensitive
  2386 +
  2387 + # start with an empty list:
  2388 + self.vba_forms = []
  2389 + # Look for any storage containing those storage/streams:
  2390 + ole = self.ole_file
  2391 + for storage in ole.listdir(streams=False, storages=True):
  2392 + log.debug('Checking storage %r' % storage)
  2393 + # Look for two streams named 'o' and 'f':
  2394 + o_stream = storage + ['o']
  2395 + f_stream = storage + ['f']
  2396 + log.debug('Checking if streams %r and %r exist' % (f_stream, o_stream))
  2397 + if ole.exists(o_stream) and ole.get_type(o_stream) == olefile.STGTY_STREAM \
  2398 + and ole.exists(f_stream) and ole.get_type(f_stream) == olefile.STGTY_STREAM:
  2399 + form_path = '/'.join(storage)
  2400 + log.debug('Found VBA Form: %r' % form_path)
  2401 + self.vba_forms.append(storage)
  2402 + return self.vba_forms
  2403 +
  2404 + def extract_form_strings(self):
  2405 + """
  2406 + Extract printable strings from each VBA Form found in the file
  2407 +
  2408 + Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found
  2409 + If the file is OLE, filename is the path of the file.
  2410 + If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
  2411 + within the zip archive, e.g. word/vbaProject.bin.
  2412 + """
  2413 + if self.ole_file is None:
  2414 + # This may be either an OpenXML or a text file:
  2415 + if self.type == TYPE_TEXT:
  2416 + # This is a text file, return no results:
  2417 + return
  2418 + else:
  2419 + # OpenXML: recursively yield results from each OLE subfile:
  2420 + for ole_subfile in self.ole_subfiles:
  2421 + for results in ole_subfile.extract_form_strings():
  2422 + yield results
  2423 + else:
  2424 + # This is an OLE file:
  2425 + self.find_vba_forms()
  2426 + ole = self.ole_file
  2427 + for form_storage in self.vba_forms:
  2428 + o_stream = form_storage + ['o']
  2429 + log.debug('Opening form object stream %r' % '/'.join(o_stream))
  2430 + form_data = ole.openstream(o_stream).read()
  2431 + # Extract printable strings from the form object stream "o":
  2432 + for m in re_printable_string.finditer(form_data):
  2433 + log.debug('Printable string found in form: %r' % m.group())
  2434 + yield (self.filename, '/'.join(o_stream), m.group())
  2435 +
  2436 +
2341 def close(self): 2437 def close(self):
2342 """ 2438 """
2343 Close all the open files. This method must be called after usage, if 2439 Close all the open files. This method must be called after usage, if
@@ -2463,6 +2559,11 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2463,6 +2559,11 @@ class VBA_Parser_CLI(VBA_Parser):
2463 print 'ANALYSIS:' 2559 print 'ANALYSIS:'
2464 # analyse each module's code, filtered to avoid false positives: 2560 # analyse each module's code, filtered to avoid false positives:
2465 self.print_analysis(show_decoded_strings) 2561 self.print_analysis(show_decoded_strings)
  2562 + for (subfilename, stream_path, form_string) in self.extract_form_strings():
  2563 + print '-' * 79
  2564 + print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)
  2565 + print '- ' * 39
  2566 + print form_string
2466 if global_analysis and not vba_code_only: 2567 if global_analysis and not vba_code_only:
2467 # analyse the code from all modules at once: 2568 # analyse the code from all modules at once:
2468 self.print_analysis(show_decoded_strings) 2569 self.print_analysis(show_decoded_strings)