Commit b21c146505d6e6f83d2a2c0105a16da83791e142

Authored by Christian Herdtweck
1 parent 44737a1c

continue integration of ppt into olevba: works now!

Showing 1 changed file with 66 additions and 32 deletions
oletools/olevba.py
@@ -241,6 +241,7 @@ import thirdparty.olefile as olefile @@ -241,6 +241,7 @@ import thirdparty.olefile as olefile
241 from thirdparty.prettytable import prettytable 241 from thirdparty.prettytable import prettytable
242 from thirdparty.xglob import xglob 242 from thirdparty.xglob import xglob
243 from thirdparty.pyparsing.pyparsing import * 243 from thirdparty.pyparsing.pyparsing import *
  244 +import ppt_parser
244 245
245 # monkeypatch email to fix issue #32: 246 # monkeypatch email to fix issue #32:
246 # allow header lines without ":" 247 # allow header lines without ":"
@@ -1970,6 +1971,8 @@ class VBA_Parser(object): @@ -1970,6 +1971,8 @@ class VBA_Parser(object):
1970 if olefile.isOleFile(_file): 1971 if olefile.isOleFile(_file):
1971 # This looks like an OLE file 1972 # This looks like an OLE file
1972 self.open_ole(_file) 1973 self.open_ole(_file)
  1974 + # if this worked, try whether it is a ppt file (special ole file)
  1975 + self.open_ppt()
1973 if self.type is None and zipfile.is_zipfile(_file): 1976 if self.type is None and zipfile.is_zipfile(_file):
1974 # Zip file, which may be an OpenXML document 1977 # Zip file, which may be an OpenXML document
1975 self.open_openxml(_file) 1978 self.open_openxml(_file)
@@ -2184,29 +2187,44 @@ class VBA_Parser(object): @@ -2184,29 +2187,44 @@ class VBA_Parser(object):
2184 % (self.filename, MSG_OLEVBA_ISSUES)) 2187 % (self.filename, MSG_OLEVBA_ISSUES))
2185 pass 2188 pass
2186 2189
2187 - def open_ppt(self, ole):  
2188 - """ try to interpret ole file as PowerPoint 97-2003 using PptParser """ 2190 + def open_ppt(self):
  2191 + """ try to interpret self.ole_file as PowerPoint 97-2003 using PptParser
  2192 +
  2193 + Although self.ole_file is a valid olefile.OleFileIO, we set
  2194 + self.ole_file = None in here and instead set self.ole_subfiles to the
  2195 + VBA ole streams found within the main ole file. That makes most of the
  2196 + code below treat this like an OpenXML file and only look at the
  2197 + ole_subfiles (except find_vba_* which needs to explicitly check for
  2198 + self.type)
  2199 + """
  2200 + log.info('Check whether OLE file is PPT')
  2201 + ppt_parser.enable_logging()
2189 try: 2202 try:
2190 - ppt_parser = ppt_parser.PptParser(ole)  
2191 - n_infos = len(ppt_parser.search_vba_info())  
2192 - storages = ppt_parser.search_vba_storage() 2203 + ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True)
  2204 + n_infos = len(ppt.search_vba_info())
  2205 + storages = ppt.search_vba_storage()
2193 n_storages = len(storages) 2206 n_storages = len(storages)
2194 log.debug('ppt: found {} infos and {} storages'.format(n_infos, 2207 log.debug('ppt: found {} infos and {} storages'.format(n_infos,
2195 n_storages)) 2208 n_storages))
2196 if n_infos != n_storages: 2209 if n_infos != n_storages:
2197 - log.warning('ppt: found different number of vba infos and storages!') 2210 + # probably, some storages are ActiveX or other OLE types
  2211 + log.warning('ppt: found different number of vba infos ({} and '
  2212 + 'storages ({}) --> subfiles might make trouble'
  2213 + .format(n_infos, n_storages))
2198 for storage in storages: 2214 for storage in storages:
2199 if storage.is_compressed: 2215 if storage.is_compressed:
2200 - storage_decomp = self.ole_file.decompress_vba_storage(storage) 2216 + storage_decomp = ppt.decompress_vba_storage(storage)
2201 else: 2217 else:
2202 log.warning('just guessing here: decompressed storage = storage?') 2218 log.warning('just guessing here: decompressed storage = storage?')
2203 storage_decomp = storage.read_all() # not implemented yet 2219 storage_decomp = storage.read_all() # not implemented yet
2204 self.ole_subfiles.append(VBA_Parser(None, storage_decomp, 2220 self.ole_subfiles.append(VBA_Parser(None, storage_decomp,
2205 container='PptParser')) 2221 container='PptParser'))
  2222 + self.ole_file.close() # just in case
2206 self.ole_file = None # required to make other methods look at ole_subfiles 2223 self.ole_file = None # required to make other methods look at ole_subfiles
2207 self.type = TYPE_PPT 2224 self.type = TYPE_PPT
2208 - except Exception:  
2209 - log.exception('Failed PPT parsing for file %r' % self.filename) 2225 + except Exception as exc:
  2226 + log.debug("File appears not to be a ppt file (%s)")
  2227 + log.debug('Exception from opening attempt:', exc_info=True)
2210 2228
2211 2229
2212 def open_text(self, data): 2230 def open_text(self, data):
@@ -2251,22 +2269,27 @@ class VBA_Parser(object): @@ -2251,22 +2269,27 @@ class VBA_Parser(object):
2251 """ 2269 """
2252 log.debug('VBA_Parser.find_vba_projects') 2270 log.debug('VBA_Parser.find_vba_projects')
2253 2271
2254 - # if this is a ppt file (PowerPoint 97-2003):  
2255 - # let ppt_parser do its job  
2256 - if self.type == TYPE_PPT:  
2257 - self.vba_projects = []  
2258 - for subfile in self.ole_subfiles:  
2259 - self.vba_projects.extend(subfile.find_vba_projects())  
2260 - return self.vba_projects  
2261 -  
2262 # if the file is not OLE but OpenXML, return None: 2272 # if the file is not OLE but OpenXML, return None:
2263 - if self.ole_file is None: 2273 + if self.ole_file is None and self.type != TYPE_PPT:
2264 return None 2274 return None
2265 2275
2266 # if this method has already been called, return previous result: 2276 # if this method has already been called, return previous result:
2267 if self.vba_projects is not None: 2277 if self.vba_projects is not None:
2268 return self.vba_projects 2278 return self.vba_projects
2269 2279
  2280 + # if this is a ppt file (PowerPoint 97-2003):
  2281 + # self.ole_file is None but the ole_subfiles do contain vba_projects
  2282 + # (like for OpenXML files).
  2283 + if self.type == TYPE_PPT:
  2284 + # TODO: so far, this function is never called for PPT files, but
  2285 + # if that happens, the information is lost which ole file contains
  2286 + # which storage!
  2287 + log.warning('Returned info is not complete for PPT types!')
  2288 + self.vba_projects = []
  2289 + for subfile in self.ole_subfiles:
  2290 + self.vba_projects.extend(subfile.find_vba_projects())
  2291 + return self.vba_projects
  2292 +
2270 # Find the VBA project root (different in MS Word, Excel, etc): 2293 # Find the VBA project root (different in MS Word, Excel, etc):
2271 # - Word 97-2003: Macros 2294 # - Word 97-2003: Macros
2272 # - Excel 97-2003: _VBA_PROJECT_CUR 2295 # - Excel 97-2003: _VBA_PROJECT_CUR
@@ -2475,7 +2498,7 @@ class VBA_Parser(object): @@ -2475,7 +2498,7 @@ class VBA_Parser(object):
2475 """ 2498 """
2476 log.debug('VBA_Parser.find_vba_forms') 2499 log.debug('VBA_Parser.find_vba_forms')
2477 # if the file is not OLE but OpenXML, return None: 2500 # if the file is not OLE but OpenXML, return None:
2478 - if self.ole_file is None: 2501 + if self.ole_file is None and self.type != TYPE_PPT:
2479 return None 2502 return None
2480 2503
2481 # if this method has already been called, return previous result: 2504 # if this method has already been called, return previous result:
@@ -2494,21 +2517,32 @@ class VBA_Parser(object): @@ -2494,21 +2517,32 @@ class VBA_Parser(object):
2494 # The name of this stream (1) MUST be "o". 2517 # The name of this stream (1) MUST be "o".
2495 # - all names are case-insensitive 2518 # - all names are case-insensitive
2496 2519
  2520 + if self.type == TYPE_PPT:
  2521 + # TODO: so far, this function is never called for PPT files, but
  2522 + # if that happens, the information is lost which ole file contains
  2523 + # which storage!
  2524 + ole_files = self.ole_subfiles
  2525 + log.warning('Returned info is not complete for PPT types!')
  2526 + else:
  2527 + ole_files = [self.ole_file, ]
  2528 +
2497 # start with an empty list: 2529 # start with an empty list:
2498 self.vba_forms = [] 2530 self.vba_forms = []
2499 - # Look for any storage containing those storage/streams:  
2500 - ole = self.ole_file  
2501 - for storage in ole.listdir(streams=False, storages=True):  
2502 - log.debug('Checking storage %r' % storage)  
2503 - # Look for two streams named 'o' and 'f':  
2504 - o_stream = storage + ['o']  
2505 - f_stream = storage + ['f']  
2506 - log.debug('Checking if streams %r and %r exist' % (f_stream, o_stream))  
2507 - if ole.exists(o_stream) and ole.get_type(o_stream) == olefile.STGTY_STREAM \  
2508 - and ole.exists(f_stream) and ole.get_type(f_stream) == olefile.STGTY_STREAM:  
2509 - form_path = '/'.join(storage)  
2510 - log.debug('Found VBA Form: %r' % form_path)  
2511 - self.vba_forms.append(storage) 2531 +
  2532 + # Loop over ole streams
  2533 + for ole in ole_files:
  2534 + # Look for any storage containing those storage/streams:
  2535 + for storage in ole.listdir(streams=False, storages=True):
  2536 + log.debug('Checking storage %r' % storage)
  2537 + # Look for two streams named 'o' and 'f':
  2538 + o_stream = storage + ['o']
  2539 + f_stream = storage + ['f']
  2540 + log.debug('Checking if streams %r and %r exist' % (f_stream, o_stream))
  2541 + if ole.exists(o_stream) and ole.get_type(o_stream) == olefile.STGTY_STREAM \
  2542 + and ole.exists(f_stream) and ole.get_type(f_stream) == olefile.STGTY_STREAM:
  2543 + form_path = '/'.join(storage)
  2544 + log.debug('Found VBA Form: %r' % form_path)
  2545 + self.vba_forms.append(storage)
2512 return self.vba_forms 2546 return self.vba_forms
2513 2547
2514 def extract_form_strings(self): 2548 def extract_form_strings(self):