Commit 5e019d003fd462efc482e539aa040b5dd2fd52eb

Authored by Philippe Lagadec
1 parent 455c85b4

olevba: look for VBA code in any stream including orphans

Showing 1 changed file with 55 additions and 0 deletions
oletools/olevba.py
... ... @@ -168,6 +168,7 @@ https://github.com/unixfreak0037/officeparser
168 168 # 2016-03-16 CH: - added option --no-deobfuscate (temporary)
169 169 # 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate
170 170 # - updated suspicious keywords
  171 +# 2016-05-04 v0.47 PL: - look for VBA code in any stream including orphans
171 172  
172 173 __version__ = '0.47'
173 174  
... ... @@ -2322,6 +2323,29 @@ class VBA_Parser(object):
2322 2323 self.contains_macros = False
2323 2324 else:
2324 2325 self.contains_macros = True
  2326 + # Also look for VBA code in any stream including orphans
  2327 + # (happens in some malformed files)
  2328 + ole = self.ole_file
  2329 + for sid in xrange(len(ole.direntries)):
  2330 + # check if id is already done above:
  2331 + log.debug('Checking DirEntry #%d' % sid)
  2332 + d = ole.direntries[sid]
  2333 + if d is None:
  2334 + # this direntry is not part of the tree: either unused or an orphan
  2335 + d = ole._load_direntry(sid)
  2336 + log.debug('This DirEntry is an orphan or unused')
  2337 + if d.entry_type == olefile.STGTY_STREAM:
  2338 + # read data
  2339 + log.debug('Reading data from stream %r - size: %d bytes' % (d.name, d.size))
  2340 + try:
  2341 + data = ole._open(d.isectStart, d.size).read()
  2342 + log.debug('Read %d bytes' % len(data))
  2343 + log.debug(repr(data))
  2344 + if 'Attribut' in data:
  2345 + log.debug('Found VBA compressed code')
  2346 + self.contains_macros = True
  2347 + except:
  2348 + log.exception('Error when reading OLE Stream %r' % d.name)
2325 2349 return self.contains_macros
2326 2350  
2327 2351 def extract_macros(self):
... ... @@ -2333,6 +2357,7 @@ class VBA_Parser(object):
2333 2357 If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
2334 2358 within the zip archive, e.g. word/vbaProject.bin.
2335 2359 """
  2360 + log.debug('extract_macros:')
2336 2361 if self.ole_file is None:
2337 2362 # This may be either an OpenXML or a text file:
2338 2363 if self.type == TYPE_TEXT:
... ... @@ -2346,11 +2371,41 @@ class VBA_Parser(object):
2346 2371 else:
2347 2372 # This is an OLE file:
2348 2373 self.find_vba_projects()
  2374 + # set of stream ids
  2375 + vba_stream_ids = set()
2349 2376 for vba_root, project_path, dir_path in self.vba_projects:
2350 2377 # extract all VBA macros from that VBA root storage:
2351 2378 for stream_path, vba_filename, vba_code in _extract_vba(self.ole_file, vba_root, project_path,
2352 2379 dir_path):
  2380 + # store direntry ids in a set:
  2381 + vba_stream_ids.add(self.ole_file._find(stream_path))
2353 2382 yield (self.filename, stream_path, vba_filename, vba_code)
  2383 + # Also look for VBA code in any stream including orphans
  2384 + # (happens in some malformed files)
  2385 + ole = self.ole_file
  2386 + for sid in xrange(len(ole.direntries)):
  2387 + # check if id is already done above:
  2388 + log.debug('Checking DirEntry #%d' % sid)
  2389 + if sid in vba_stream_ids:
  2390 + log.debug('Already extracted')
  2391 + continue
  2392 + d = ole.direntries[sid]
  2393 + if d is None:
  2394 + # this direntry is not part of the tree: either unused or an orphan
  2395 + d = ole._load_direntry(sid)
  2396 + log.debug('This DirEntry is an orphan or unused')
  2397 + if d.entry_type == olefile.STGTY_STREAM:
  2398 + # read data
  2399 + log.debug('Reading data from stream %r' % d.name)
  2400 + data = ole._open(d.isectStart, d.size).read()
  2401 + for match in re.finditer(r'\x00Attribut[^e]', data, flags=re.IGNORECASE):
  2402 + start = match.start() - 3
  2403 + log.debug('Found VBA compressed code at index %X' % start)
  2404 + compressed_code = data[start:]
  2405 + vba_code = decompress_stream(compressed_code)
  2406 + yield (self.filename, d.name, d.name, vba_code)
  2407 +
  2408 +
2354 2409  
2355 2410  
2356 2411 def extract_all_macros(self):
... ...