Commit 7f6a924d5bf7116ad1b860c8b66160d55bfb7097

Authored by Philippe Lagadec
2 parents db62635f e98b3ae0

olevba: merge changes to reveal API

oletools/doc/olevba.md
... ... @@ -323,6 +323,16 @@ Example:
323 323  
324 324 Alternatively, the VBA_Parser method **extract_all_macros** returns the same results as a list of tuples.
325 325  
  326 +### Extract Experimental Deobfuscated VBA Macro Source Code
  327 +
  328 +The method **reveal** extracts, decompresses, and deofuscates VBA source code into a single string.
  329 +
  330 +Example:
  331 +
  332 + :::python
  333 + print vbaparser.reveal()
  334 +
  335 +
326 336 ### Analyze VBA Source Code
327 337  
328 338 Since version 0.40, the VBA_Parser class provides simpler methods than VBA_Scanner to analyze all macros contained
... ... @@ -498,4 +508,4 @@ python-oletools documentation
498 508 - [[oletimes]]
499 509 - [[olevba]]
500 510 - [[pyxswf]]
501   - - [[rtfobj]]
502 511 \ No newline at end of file
  512 + - [[rtfobj]]
... ...
oletools/olevba.py
... ... @@ -2187,7 +2187,23 @@ class VBA_Parser(object):
2187 2187 return self.analysis_results
2188 2188  
2189 2189  
2190   -
  2190 + def reveal(self):
  2191 + # we only want printable strings:
  2192 + analysis = self.analyze_macros(show_decoded_strings=False)
  2193 + # to avoid replacing short strings contained into longer strings, we sort the analysis results
  2194 + # based on the length of the encoded string, in reverse order:
  2195 + analysis = sorted(analysis, key=lambda type_decoded_encoded: len(type_decoded_encoded[2]), reverse=True)
  2196 + # normally now self.vba_code_all_modules contains source code from all modules
  2197 + deobf_code = self.vba_code_all_modules
  2198 + for kw_type, decoded, encoded in analysis:
  2199 + if kw_type == 'VBA string':
  2200 + #print '%3d occurences: %r => %r' % (deobf_code.count(encoded), encoded, decoded)
  2201 + # need to add double quotes around the decoded strings
  2202 + # after escaping double-quotes as double-double-quotes for VBA:
  2203 + decoded = decoded.replace('"', '""')
  2204 + deobf_code = deobf_code.replace(encoded, '"%s"' % decoded)
  2205 + return deobf_code
  2206 + #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees
2191 2207  
2192 2208  
2193 2209 def close(self):
... ... @@ -2263,28 +2279,6 @@ class VBA_Parser_CLI(VBA_Parser):
2263 2279 print 'No suspicious keyword or IOC found.'
2264 2280  
2265 2281  
2266   - def reveal(self):
2267   - #TODO: move this code to the VBA_Parser class (without print)
2268   - print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n'
2269   - # we only want printable strings:
2270   - analysis = self.analyze_macros(show_decoded_strings=False)
2271   - # to avoid replacing short strings contained into longer strings, we sort the analysis results
2272   - # based on the length of the encoded string, in reverse order:
2273   - analysis = sorted(analysis, key=lambda type_decoded_encoded: len(type_decoded_encoded[2]), reverse=True)
2274   - # normally now self.vba_code_all_modules contains source code from all modules
2275   - deobf_code = self.vba_code_all_modules
2276   - for kw_type, decoded, encoded in analysis:
2277   - if kw_type == 'VBA string':
2278   - #print '%3d occurences: %r => %r' % (deobf_code.count(encoded), encoded, decoded)
2279   - # need to add double quotes around the decoded strings
2280   - # after escaping double-quotes as double-double-quotes for VBA:
2281   - decoded = decoded.replace('"', '""')
2282   - deobf_code = deobf_code.replace(encoded, '"%s"' % decoded)
2283   - print ''
2284   - print deobf_code
2285   - #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees
2286   -
2287   -
2288 2282 def process_file(self, show_decoded_strings=False,
2289 2283 display_code=True, global_analysis=True, hide_attributes=True,
2290 2284 vba_code_only=False, show_deobfuscated_code=False):
... ... @@ -2341,7 +2335,8 @@ class VBA_Parser_CLI(VBA_Parser):
2341 2335 # analyse the code from all modules at once:
2342 2336 self.print_analysis(show_decoded_strings)
2343 2337 if show_deobfuscated_code:
2344   - self.reveal()
  2338 + print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n'
  2339 + print self.reveal()
2345 2340 else:
2346 2341 print 'No VBA macros found.'
2347 2342 except: #TypeError:
... ... @@ -2548,4 +2543,4 @@ def main():
2548 2543 if __name__ == '__main__':
2549 2544 main()
2550 2545  
2551   -# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness
2552 2546 \ No newline at end of file
  2547 +# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness
... ...