diff --git a/oletools/doc/olevba.md b/oletools/doc/olevba.md index dd40922..000b593 100644 --- a/oletools/doc/olevba.md +++ b/oletools/doc/olevba.md @@ -323,6 +323,16 @@ Example: Alternatively, the VBA_Parser method **extract_all_macros** returns the same results as a list of tuples. +### Extract Experimental Deobfuscated VBA Macro Source Code + +The method **reveal** extracts, decompresses, and deofuscates VBA source code into a single string. + +Example: + + :::python + print vbaparser.reveal() + + ### Analyze VBA Source Code Since version 0.40, the VBA_Parser class provides simpler methods than VBA_Scanner to analyze all macros contained @@ -498,4 +508,4 @@ python-oletools documentation - [[oletimes]] - [[olevba]] - [[pyxswf]] - - [[rtfobj]] \ No newline at end of file + - [[rtfobj]] diff --git a/oletools/olevba.py b/oletools/olevba.py index 375909c..6d4f3ce 100755 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -2187,7 +2187,23 @@ class VBA_Parser(object): return self.analysis_results - + def reveal(self): + # we only want printable strings: + analysis = self.analyze_macros(show_decoded_strings=False) + # to avoid replacing short strings contained into longer strings, we sort the analysis results + # based on the length of the encoded string, in reverse order: + analysis = sorted(analysis, key=lambda type_decoded_encoded: len(type_decoded_encoded[2]), reverse=True) + # normally now self.vba_code_all_modules contains source code from all modules + deobf_code = self.vba_code_all_modules + for kw_type, decoded, encoded in analysis: + if kw_type == 'VBA string': + #print '%3d occurences: %r => %r' % (deobf_code.count(encoded), encoded, decoded) + # need to add double quotes around the decoded strings + # after escaping double-quotes as double-double-quotes for VBA: + decoded = decoded.replace('"', '""') + deobf_code = deobf_code.replace(encoded, '"%s"' % decoded) + return deobf_code + #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees def close(self): @@ -2263,28 +2279,6 @@ class VBA_Parser_CLI(VBA_Parser): print 'No suspicious keyword or IOC found.' - def reveal(self): - #TODO: move this code to the VBA_Parser class (without print) - print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n' - # we only want printable strings: - analysis = self.analyze_macros(show_decoded_strings=False) - # to avoid replacing short strings contained into longer strings, we sort the analysis results - # based on the length of the encoded string, in reverse order: - analysis = sorted(analysis, key=lambda type_decoded_encoded: len(type_decoded_encoded[2]), reverse=True) - # normally now self.vba_code_all_modules contains source code from all modules - deobf_code = self.vba_code_all_modules - for kw_type, decoded, encoded in analysis: - if kw_type == 'VBA string': - #print '%3d occurences: %r => %r' % (deobf_code.count(encoded), encoded, decoded) - # need to add double quotes around the decoded strings - # after escaping double-quotes as double-double-quotes for VBA: - decoded = decoded.replace('"', '""') - deobf_code = deobf_code.replace(encoded, '"%s"' % decoded) - print '' - print deobf_code - #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees - - def process_file(self, show_decoded_strings=False, display_code=True, global_analysis=True, hide_attributes=True, vba_code_only=False, show_deobfuscated_code=False): @@ -2341,7 +2335,8 @@ class VBA_Parser_CLI(VBA_Parser): # analyse the code from all modules at once: self.print_analysis(show_decoded_strings) if show_deobfuscated_code: - self.reveal() + print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n' + print self.reveal() else: print 'No VBA macros found.' except: #TypeError: @@ -2548,4 +2543,4 @@ def main(): if __name__ == '__main__': main() -# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness \ No newline at end of file +# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness