Commit b21c146505d6e6f83d2a2c0105a16da83791e142
1 parent
44737a1c
continue integration of ppt into olevba: works now!
Showing
1 changed file
with
66 additions
and
32 deletions
oletools/olevba.py
| ... | ... | @@ -241,6 +241,7 @@ import thirdparty.olefile as olefile |
| 241 | 241 | from thirdparty.prettytable import prettytable |
| 242 | 242 | from thirdparty.xglob import xglob |
| 243 | 243 | from thirdparty.pyparsing.pyparsing import * |
| 244 | +import ppt_parser | |
| 244 | 245 | |
| 245 | 246 | # monkeypatch email to fix issue #32: |
| 246 | 247 | # allow header lines without ":" |
| ... | ... | @@ -1970,6 +1971,8 @@ class VBA_Parser(object): |
| 1970 | 1971 | if olefile.isOleFile(_file): |
| 1971 | 1972 | # This looks like an OLE file |
| 1972 | 1973 | self.open_ole(_file) |
| 1974 | + # if this worked, try whether it is a ppt file (special ole file) | |
| 1975 | + self.open_ppt() | |
| 1973 | 1976 | if self.type is None and zipfile.is_zipfile(_file): |
| 1974 | 1977 | # Zip file, which may be an OpenXML document |
| 1975 | 1978 | self.open_openxml(_file) |
| ... | ... | @@ -2184,29 +2187,44 @@ class VBA_Parser(object): |
| 2184 | 2187 | % (self.filename, MSG_OLEVBA_ISSUES)) |
| 2185 | 2188 | pass |
| 2186 | 2189 | |
| 2187 | - def open_ppt(self, ole): | |
| 2188 | - """ try to interpret ole file as PowerPoint 97-2003 using PptParser """ | |
| 2190 | + def open_ppt(self): | |
| 2191 | + """ try to interpret self.ole_file as PowerPoint 97-2003 using PptParser | |
| 2192 | + | |
| 2193 | + Although self.ole_file is a valid olefile.OleFileIO, we set | |
| 2194 | + self.ole_file = None in here and instead set self.ole_subfiles to the | |
| 2195 | + VBA ole streams found within the main ole file. That makes most of the | |
| 2196 | + code below treat this like an OpenXML file and only look at the | |
| 2197 | + ole_subfiles (except find_vba_* which needs to explicitly check for | |
| 2198 | + self.type) | |
| 2199 | + """ | |
| 2200 | + log.info('Check whether OLE file is PPT') | |
| 2201 | + ppt_parser.enable_logging() | |
| 2189 | 2202 | try: |
| 2190 | - ppt_parser = ppt_parser.PptParser(ole) | |
| 2191 | - n_infos = len(ppt_parser.search_vba_info()) | |
| 2192 | - storages = ppt_parser.search_vba_storage() | |
| 2203 | + ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True) | |
| 2204 | + n_infos = len(ppt.search_vba_info()) | |
| 2205 | + storages = ppt.search_vba_storage() | |
| 2193 | 2206 | n_storages = len(storages) |
| 2194 | 2207 | log.debug('ppt: found {} infos and {} storages'.format(n_infos, |
| 2195 | 2208 | n_storages)) |
| 2196 | 2209 | if n_infos != n_storages: |
| 2197 | - log.warning('ppt: found different number of vba infos and storages!') | |
| 2210 | + # probably, some storages are ActiveX or other OLE types | |
| 2211 | + log.warning('ppt: found different number of vba infos ({} and ' | |
| 2212 | + 'storages ({}) --> subfiles might make trouble' | |
| 2213 | + .format(n_infos, n_storages)) | |
| 2198 | 2214 | for storage in storages: |
| 2199 | 2215 | if storage.is_compressed: |
| 2200 | - storage_decomp = self.ole_file.decompress_vba_storage(storage) | |
| 2216 | + storage_decomp = ppt.decompress_vba_storage(storage) | |
| 2201 | 2217 | else: |
| 2202 | 2218 | log.warning('just guessing here: decompressed storage = storage?') |
| 2203 | 2219 | storage_decomp = storage.read_all() # not implemented yet |
| 2204 | 2220 | self.ole_subfiles.append(VBA_Parser(None, storage_decomp, |
| 2205 | 2221 | container='PptParser')) |
| 2222 | + self.ole_file.close() # just in case | |
| 2206 | 2223 | self.ole_file = None # required to make other methods look at ole_subfiles |
| 2207 | 2224 | self.type = TYPE_PPT |
| 2208 | - except Exception: | |
| 2209 | - log.exception('Failed PPT parsing for file %r' % self.filename) | |
| 2225 | + except Exception as exc: | |
| 2226 | + log.debug("File appears not to be a ppt file (%s)") | |
| 2227 | + log.debug('Exception from opening attempt:', exc_info=True) | |
| 2210 | 2228 | |
| 2211 | 2229 | |
| 2212 | 2230 | def open_text(self, data): |
| ... | ... | @@ -2251,22 +2269,27 @@ class VBA_Parser(object): |
| 2251 | 2269 | """ |
| 2252 | 2270 | log.debug('VBA_Parser.find_vba_projects') |
| 2253 | 2271 | |
| 2254 | - # if this is a ppt file (PowerPoint 97-2003): | |
| 2255 | - # let ppt_parser do its job | |
| 2256 | - if self.type == TYPE_PPT: | |
| 2257 | - self.vba_projects = [] | |
| 2258 | - for subfile in self.ole_subfiles: | |
| 2259 | - self.vba_projects.extend(subfile.find_vba_projects()) | |
| 2260 | - return self.vba_projects | |
| 2261 | - | |
| 2262 | 2272 | # if the file is not OLE but OpenXML, return None: |
| 2263 | - if self.ole_file is None: | |
| 2273 | + if self.ole_file is None and self.type != TYPE_PPT: | |
| 2264 | 2274 | return None |
| 2265 | 2275 | |
| 2266 | 2276 | # if this method has already been called, return previous result: |
| 2267 | 2277 | if self.vba_projects is not None: |
| 2268 | 2278 | return self.vba_projects |
| 2269 | 2279 | |
| 2280 | + # if this is a ppt file (PowerPoint 97-2003): | |
| 2281 | + # self.ole_file is None but the ole_subfiles do contain vba_projects | |
| 2282 | + # (like for OpenXML files). | |
| 2283 | + if self.type == TYPE_PPT: | |
| 2284 | + # TODO: so far, this function is never called for PPT files, but | |
| 2285 | + # if that happens, the information is lost which ole file contains | |
| 2286 | + # which storage! | |
| 2287 | + log.warning('Returned info is not complete for PPT types!') | |
| 2288 | + self.vba_projects = [] | |
| 2289 | + for subfile in self.ole_subfiles: | |
| 2290 | + self.vba_projects.extend(subfile.find_vba_projects()) | |
| 2291 | + return self.vba_projects | |
| 2292 | + | |
| 2270 | 2293 | # Find the VBA project root (different in MS Word, Excel, etc): |
| 2271 | 2294 | # - Word 97-2003: Macros |
| 2272 | 2295 | # - Excel 97-2003: _VBA_PROJECT_CUR |
| ... | ... | @@ -2475,7 +2498,7 @@ class VBA_Parser(object): |
| 2475 | 2498 | """ |
| 2476 | 2499 | log.debug('VBA_Parser.find_vba_forms') |
| 2477 | 2500 | # if the file is not OLE but OpenXML, return None: |
| 2478 | - if self.ole_file is None: | |
| 2501 | + if self.ole_file is None and self.type != TYPE_PPT: | |
| 2479 | 2502 | return None |
| 2480 | 2503 | |
| 2481 | 2504 | # if this method has already been called, return previous result: |
| ... | ... | @@ -2494,21 +2517,32 @@ class VBA_Parser(object): |
| 2494 | 2517 | # The name of this stream (1) MUST be "o". |
| 2495 | 2518 | # - all names are case-insensitive |
| 2496 | 2519 | |
| 2520 | + if self.type == TYPE_PPT: | |
| 2521 | + # TODO: so far, this function is never called for PPT files, but | |
| 2522 | + # if that happens, the information is lost which ole file contains | |
| 2523 | + # which storage! | |
| 2524 | + ole_files = self.ole_subfiles | |
| 2525 | + log.warning('Returned info is not complete for PPT types!') | |
| 2526 | + else: | |
| 2527 | + ole_files = [self.ole_file, ] | |
| 2528 | + | |
| 2497 | 2529 | # start with an empty list: |
| 2498 | 2530 | self.vba_forms = [] |
| 2499 | - # Look for any storage containing those storage/streams: | |
| 2500 | - ole = self.ole_file | |
| 2501 | - for storage in ole.listdir(streams=False, storages=True): | |
| 2502 | - log.debug('Checking storage %r' % storage) | |
| 2503 | - # Look for two streams named 'o' and 'f': | |
| 2504 | - o_stream = storage + ['o'] | |
| 2505 | - f_stream = storage + ['f'] | |
| 2506 | - log.debug('Checking if streams %r and %r exist' % (f_stream, o_stream)) | |
| 2507 | - if ole.exists(o_stream) and ole.get_type(o_stream) == olefile.STGTY_STREAM \ | |
| 2508 | - and ole.exists(f_stream) and ole.get_type(f_stream) == olefile.STGTY_STREAM: | |
| 2509 | - form_path = '/'.join(storage) | |
| 2510 | - log.debug('Found VBA Form: %r' % form_path) | |
| 2511 | - self.vba_forms.append(storage) | |
| 2531 | + | |
| 2532 | + # Loop over ole streams | |
| 2533 | + for ole in ole_files: | |
| 2534 | + # Look for any storage containing those storage/streams: | |
| 2535 | + for storage in ole.listdir(streams=False, storages=True): | |
| 2536 | + log.debug('Checking storage %r' % storage) | |
| 2537 | + # Look for two streams named 'o' and 'f': | |
| 2538 | + o_stream = storage + ['o'] | |
| 2539 | + f_stream = storage + ['f'] | |
| 2540 | + log.debug('Checking if streams %r and %r exist' % (f_stream, o_stream)) | |
| 2541 | + if ole.exists(o_stream) and ole.get_type(o_stream) == olefile.STGTY_STREAM \ | |
| 2542 | + and ole.exists(f_stream) and ole.get_type(f_stream) == olefile.STGTY_STREAM: | |
| 2543 | + form_path = '/'.join(storage) | |
| 2544 | + log.debug('Found VBA Form: %r' % form_path) | |
| 2545 | + self.vba_forms.append(storage) | |
| 2512 | 2546 | return self.vba_forms |
| 2513 | 2547 | |
| 2514 | 2548 | def extract_form_strings(self): | ... | ... |