Commit 5e019d003fd462efc482e539aa040b5dd2fd52eb

Authored by Philippe Lagadec
1 parent 455c85b4

olevba: look for VBA code in any stream including orphans

Showing 1 changed file with 55 additions and 0 deletions
oletools/olevba.py
@@ -168,6 +168,7 @@ https://github.com/unixfreak0037/officeparser @@ -168,6 +168,7 @@ https://github.com/unixfreak0037/officeparser
168 # 2016-03-16 CH: - added option --no-deobfuscate (temporary) 168 # 2016-03-16 CH: - added option --no-deobfuscate (temporary)
169 # 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate 169 # 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate
170 # - updated suspicious keywords 170 # - updated suspicious keywords
  171 +# 2016-05-04 v0.47 PL: - look for VBA code in any stream including orphans
171 172
172 __version__ = '0.47' 173 __version__ = '0.47'
173 174
@@ -2322,6 +2323,29 @@ class VBA_Parser(object): @@ -2322,6 +2323,29 @@ class VBA_Parser(object):
2322 self.contains_macros = False 2323 self.contains_macros = False
2323 else: 2324 else:
2324 self.contains_macros = True 2325 self.contains_macros = True
  2326 + # Also look for VBA code in any stream including orphans
  2327 + # (happens in some malformed files)
  2328 + ole = self.ole_file
  2329 + for sid in xrange(len(ole.direntries)):
  2330 + # check if id is already done above:
  2331 + log.debug('Checking DirEntry #%d' % sid)
  2332 + d = ole.direntries[sid]
  2333 + if d is None:
  2334 + # this direntry is not part of the tree: either unused or an orphan
  2335 + d = ole._load_direntry(sid)
  2336 + log.debug('This DirEntry is an orphan or unused')
  2337 + if d.entry_type == olefile.STGTY_STREAM:
  2338 + # read data
  2339 + log.debug('Reading data from stream %r - size: %d bytes' % (d.name, d.size))
  2340 + try:
  2341 + data = ole._open(d.isectStart, d.size).read()
  2342 + log.debug('Read %d bytes' % len(data))
  2343 + log.debug(repr(data))
  2344 + if 'Attribut' in data:
  2345 + log.debug('Found VBA compressed code')
  2346 + self.contains_macros = True
  2347 + except:
  2348 + log.exception('Error when reading OLE Stream %r' % d.name)
2325 return self.contains_macros 2349 return self.contains_macros
2326 2350
2327 def extract_macros(self): 2351 def extract_macros(self):
@@ -2333,6 +2357,7 @@ class VBA_Parser(object): @@ -2333,6 +2357,7 @@ class VBA_Parser(object):
2333 If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros 2357 If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
2334 within the zip archive, e.g. word/vbaProject.bin. 2358 within the zip archive, e.g. word/vbaProject.bin.
2335 """ 2359 """
  2360 + log.debug('extract_macros:')
2336 if self.ole_file is None: 2361 if self.ole_file is None:
2337 # This may be either an OpenXML or a text file: 2362 # This may be either an OpenXML or a text file:
2338 if self.type == TYPE_TEXT: 2363 if self.type == TYPE_TEXT:
@@ -2346,11 +2371,41 @@ class VBA_Parser(object): @@ -2346,11 +2371,41 @@ class VBA_Parser(object):
2346 else: 2371 else:
2347 # This is an OLE file: 2372 # This is an OLE file:
2348 self.find_vba_projects() 2373 self.find_vba_projects()
  2374 + # set of stream ids
  2375 + vba_stream_ids = set()
2349 for vba_root, project_path, dir_path in self.vba_projects: 2376 for vba_root, project_path, dir_path in self.vba_projects:
2350 # extract all VBA macros from that VBA root storage: 2377 # extract all VBA macros from that VBA root storage:
2351 for stream_path, vba_filename, vba_code in _extract_vba(self.ole_file, vba_root, project_path, 2378 for stream_path, vba_filename, vba_code in _extract_vba(self.ole_file, vba_root, project_path,
2352 dir_path): 2379 dir_path):
  2380 + # store direntry ids in a set:
  2381 + vba_stream_ids.add(self.ole_file._find(stream_path))
2353 yield (self.filename, stream_path, vba_filename, vba_code) 2382 yield (self.filename, stream_path, vba_filename, vba_code)
  2383 + # Also look for VBA code in any stream including orphans
  2384 + # (happens in some malformed files)
  2385 + ole = self.ole_file
  2386 + for sid in xrange(len(ole.direntries)):
  2387 + # check if id is already done above:
  2388 + log.debug('Checking DirEntry #%d' % sid)
  2389 + if sid in vba_stream_ids:
  2390 + log.debug('Already extracted')
  2391 + continue
  2392 + d = ole.direntries[sid]
  2393 + if d is None:
  2394 + # this direntry is not part of the tree: either unused or an orphan
  2395 + d = ole._load_direntry(sid)
  2396 + log.debug('This DirEntry is an orphan or unused')
  2397 + if d.entry_type == olefile.STGTY_STREAM:
  2398 + # read data
  2399 + log.debug('Reading data from stream %r' % d.name)
  2400 + data = ole._open(d.isectStart, d.size).read()
  2401 + for match in re.finditer(r'\x00Attribut[^e]', data, flags=re.IGNORECASE):
  2402 + start = match.start() - 3
  2403 + log.debug('Found VBA compressed code at index %X' % start)
  2404 + compressed_code = data[start:]
  2405 + vba_code = decompress_stream(compressed_code)
  2406 + yield (self.filename, d.name, d.name, vba_code)
  2407 +
  2408 +
2354 2409
2355 2410
2356 def extract_all_macros(self): 2411 def extract_all_macros(self):