Commit 7f6a924d5bf7116ad1b860c8b66160d55bfb7097

Authored by Philippe Lagadec
2 parents db62635f e98b3ae0

olevba: merge changes to reveal API

oletools/doc/olevba.md
@@ -323,6 +323,16 @@ Example: @@ -323,6 +323,16 @@ Example:
323 323
324 Alternatively, the VBA_Parser method **extract_all_macros** returns the same results as a list of tuples. 324 Alternatively, the VBA_Parser method **extract_all_macros** returns the same results as a list of tuples.
325 325
  326 +### Extract Experimental Deobfuscated VBA Macro Source Code
  327 +
  328 +The method **reveal** extracts, decompresses, and deofuscates VBA source code into a single string.
  329 +
  330 +Example:
  331 +
  332 + :::python
  333 + print vbaparser.reveal()
  334 +
  335 +
326 ### Analyze VBA Source Code 336 ### Analyze VBA Source Code
327 337
328 Since version 0.40, the VBA_Parser class provides simpler methods than VBA_Scanner to analyze all macros contained 338 Since version 0.40, the VBA_Parser class provides simpler methods than VBA_Scanner to analyze all macros contained
@@ -498,4 +508,4 @@ python-oletools documentation @@ -498,4 +508,4 @@ python-oletools documentation
498 - [[oletimes]] 508 - [[oletimes]]
499 - [[olevba]] 509 - [[olevba]]
500 - [[pyxswf]] 510 - [[pyxswf]]
501 - - [[rtfobj]]  
502 \ No newline at end of file 511 \ No newline at end of file
  512 + - [[rtfobj]]
oletools/olevba.py
@@ -2187,7 +2187,23 @@ class VBA_Parser(object): @@ -2187,7 +2187,23 @@ class VBA_Parser(object):
2187 return self.analysis_results 2187 return self.analysis_results
2188 2188
2189 2189
2190 - 2190 + def reveal(self):
  2191 + # we only want printable strings:
  2192 + analysis = self.analyze_macros(show_decoded_strings=False)
  2193 + # to avoid replacing short strings contained into longer strings, we sort the analysis results
  2194 + # based on the length of the encoded string, in reverse order:
  2195 + analysis = sorted(analysis, key=lambda type_decoded_encoded: len(type_decoded_encoded[2]), reverse=True)
  2196 + # normally now self.vba_code_all_modules contains source code from all modules
  2197 + deobf_code = self.vba_code_all_modules
  2198 + for kw_type, decoded, encoded in analysis:
  2199 + if kw_type == 'VBA string':
  2200 + #print '%3d occurences: %r => %r' % (deobf_code.count(encoded), encoded, decoded)
  2201 + # need to add double quotes around the decoded strings
  2202 + # after escaping double-quotes as double-double-quotes for VBA:
  2203 + decoded = decoded.replace('"', '""')
  2204 + deobf_code = deobf_code.replace(encoded, '"%s"' % decoded)
  2205 + return deobf_code
  2206 + #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees
2191 2207
2192 2208
2193 def close(self): 2209 def close(self):
@@ -2263,28 +2279,6 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2263,28 +2279,6 @@ class VBA_Parser_CLI(VBA_Parser):
2263 print 'No suspicious keyword or IOC found.' 2279 print 'No suspicious keyword or IOC found.'
2264 2280
2265 2281
2266 - def reveal(self):  
2267 - #TODO: move this code to the VBA_Parser class (without print)  
2268 - print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n'  
2269 - # we only want printable strings:  
2270 - analysis = self.analyze_macros(show_decoded_strings=False)  
2271 - # to avoid replacing short strings contained into longer strings, we sort the analysis results  
2272 - # based on the length of the encoded string, in reverse order:  
2273 - analysis = sorted(analysis, key=lambda type_decoded_encoded: len(type_decoded_encoded[2]), reverse=True)  
2274 - # normally now self.vba_code_all_modules contains source code from all modules  
2275 - deobf_code = self.vba_code_all_modules  
2276 - for kw_type, decoded, encoded in analysis:  
2277 - if kw_type == 'VBA string':  
2278 - #print '%3d occurences: %r => %r' % (deobf_code.count(encoded), encoded, decoded)  
2279 - # need to add double quotes around the decoded strings  
2280 - # after escaping double-quotes as double-double-quotes for VBA:  
2281 - decoded = decoded.replace('"', '""')  
2282 - deobf_code = deobf_code.replace(encoded, '"%s"' % decoded)  
2283 - print ''  
2284 - print deobf_code  
2285 - #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees  
2286 -  
2287 -  
2288 def process_file(self, show_decoded_strings=False, 2282 def process_file(self, show_decoded_strings=False,
2289 display_code=True, global_analysis=True, hide_attributes=True, 2283 display_code=True, global_analysis=True, hide_attributes=True,
2290 vba_code_only=False, show_deobfuscated_code=False): 2284 vba_code_only=False, show_deobfuscated_code=False):
@@ -2341,7 +2335,8 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2341,7 +2335,8 @@ class VBA_Parser_CLI(VBA_Parser):
2341 # analyse the code from all modules at once: 2335 # analyse the code from all modules at once:
2342 self.print_analysis(show_decoded_strings) 2336 self.print_analysis(show_decoded_strings)
2343 if show_deobfuscated_code: 2337 if show_deobfuscated_code:
2344 - self.reveal() 2338 + print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n'
  2339 + print self.reveal()
2345 else: 2340 else:
2346 print 'No VBA macros found.' 2341 print 'No VBA macros found.'
2347 except: #TypeError: 2342 except: #TypeError:
@@ -2548,4 +2543,4 @@ def main(): @@ -2548,4 +2543,4 @@ def main():
2548 if __name__ == '__main__': 2543 if __name__ == '__main__':
2549 main() 2544 main()
2550 2545
2551 -# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness  
2552 \ No newline at end of file 2546 \ No newline at end of file
  2547 +# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness