Commit 5e019d003fd462efc482e539aa040b5dd2fd52eb
1 parent
455c85b4
olevba: look for VBA code in any stream including orphans
Showing
1 changed file
with
55 additions
and
0 deletions
oletools/olevba.py
| ... | ... | @@ -168,6 +168,7 @@ https://github.com/unixfreak0037/officeparser |
| 168 | 168 | # 2016-03-16 CH: - added option --no-deobfuscate (temporary) |
| 169 | 169 | # 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate |
| 170 | 170 | # - updated suspicious keywords |
| 171 | +# 2016-05-04 v0.47 PL: - look for VBA code in any stream including orphans | |
| 171 | 172 | |
| 172 | 173 | __version__ = '0.47' |
| 173 | 174 | |
| ... | ... | @@ -2322,6 +2323,29 @@ class VBA_Parser(object): |
| 2322 | 2323 | self.contains_macros = False |
| 2323 | 2324 | else: |
| 2324 | 2325 | self.contains_macros = True |
| 2326 | + # Also look for VBA code in any stream including orphans | |
| 2327 | + # (happens in some malformed files) | |
| 2328 | + ole = self.ole_file | |
| 2329 | + for sid in xrange(len(ole.direntries)): | |
| 2330 | + # check if id is already done above: | |
| 2331 | + log.debug('Checking DirEntry #%d' % sid) | |
| 2332 | + d = ole.direntries[sid] | |
| 2333 | + if d is None: | |
| 2334 | + # this direntry is not part of the tree: either unused or an orphan | |
| 2335 | + d = ole._load_direntry(sid) | |
| 2336 | + log.debug('This DirEntry is an orphan or unused') | |
| 2337 | + if d.entry_type == olefile.STGTY_STREAM: | |
| 2338 | + # read data | |
| 2339 | + log.debug('Reading data from stream %r - size: %d bytes' % (d.name, d.size)) | |
| 2340 | + try: | |
| 2341 | + data = ole._open(d.isectStart, d.size).read() | |
| 2342 | + log.debug('Read %d bytes' % len(data)) | |
| 2343 | + log.debug(repr(data)) | |
| 2344 | + if 'Attribut' in data: | |
| 2345 | + log.debug('Found VBA compressed code') | |
| 2346 | + self.contains_macros = True | |
| 2347 | + except: | |
| 2348 | + log.exception('Error when reading OLE Stream %r' % d.name) | |
| 2325 | 2349 | return self.contains_macros |
| 2326 | 2350 | |
| 2327 | 2351 | def extract_macros(self): |
| ... | ... | @@ -2333,6 +2357,7 @@ class VBA_Parser(object): |
| 2333 | 2357 | If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros |
| 2334 | 2358 | within the zip archive, e.g. word/vbaProject.bin. |
| 2335 | 2359 | """ |
| 2360 | + log.debug('extract_macros:') | |
| 2336 | 2361 | if self.ole_file is None: |
| 2337 | 2362 | # This may be either an OpenXML or a text file: |
| 2338 | 2363 | if self.type == TYPE_TEXT: |
| ... | ... | @@ -2346,11 +2371,41 @@ class VBA_Parser(object): |
| 2346 | 2371 | else: |
| 2347 | 2372 | # This is an OLE file: |
| 2348 | 2373 | self.find_vba_projects() |
| 2374 | + # set of stream ids | |
| 2375 | + vba_stream_ids = set() | |
| 2349 | 2376 | for vba_root, project_path, dir_path in self.vba_projects: |
| 2350 | 2377 | # extract all VBA macros from that VBA root storage: |
| 2351 | 2378 | for stream_path, vba_filename, vba_code in _extract_vba(self.ole_file, vba_root, project_path, |
| 2352 | 2379 | dir_path): |
| 2380 | + # store direntry ids in a set: | |
| 2381 | + vba_stream_ids.add(self.ole_file._find(stream_path)) | |
| 2353 | 2382 | yield (self.filename, stream_path, vba_filename, vba_code) |
| 2383 | + # Also look for VBA code in any stream including orphans | |
| 2384 | + # (happens in some malformed files) | |
| 2385 | + ole = self.ole_file | |
| 2386 | + for sid in xrange(len(ole.direntries)): | |
| 2387 | + # check if id is already done above: | |
| 2388 | + log.debug('Checking DirEntry #%d' % sid) | |
| 2389 | + if sid in vba_stream_ids: | |
| 2390 | + log.debug('Already extracted') | |
| 2391 | + continue | |
| 2392 | + d = ole.direntries[sid] | |
| 2393 | + if d is None: | |
| 2394 | + # this direntry is not part of the tree: either unused or an orphan | |
| 2395 | + d = ole._load_direntry(sid) | |
| 2396 | + log.debug('This DirEntry is an orphan or unused') | |
| 2397 | + if d.entry_type == olefile.STGTY_STREAM: | |
| 2398 | + # read data | |
| 2399 | + log.debug('Reading data from stream %r' % d.name) | |
| 2400 | + data = ole._open(d.isectStart, d.size).read() | |
| 2401 | + for match in re.finditer(r'\x00Attribut[^e]', data, flags=re.IGNORECASE): | |
| 2402 | + start = match.start() - 3 | |
| 2403 | + log.debug('Found VBA compressed code at index %X' % start) | |
| 2404 | + compressed_code = data[start:] | |
| 2405 | + vba_code = decompress_stream(compressed_code) | |
| 2406 | + yield (self.filename, d.name, d.name, vba_code) | |
| 2407 | + | |
| 2408 | + | |
| 2354 | 2409 | |
| 2355 | 2410 | |
| 2356 | 2411 | def extract_all_macros(self): | ... | ... |