Commit 7f6a924d5bf7116ad1b860c8b66160d55bfb7097
olevba: merge changes to reveal API
Showing
2 changed files
with
31 additions
and
26 deletions
oletools/doc/olevba.md
| @@ -323,6 +323,16 @@ Example: | @@ -323,6 +323,16 @@ Example: | ||
| 323 | 323 | ||
| 324 | Alternatively, the VBA_Parser method **extract_all_macros** returns the same results as a list of tuples. | 324 | Alternatively, the VBA_Parser method **extract_all_macros** returns the same results as a list of tuples. |
| 325 | 325 | ||
| 326 | +### Extract Experimental Deobfuscated VBA Macro Source Code | ||
| 327 | + | ||
| 328 | +The method **reveal** extracts, decompresses, and deofuscates VBA source code into a single string. | ||
| 329 | + | ||
| 330 | +Example: | ||
| 331 | + | ||
| 332 | + :::python | ||
| 333 | + print vbaparser.reveal() | ||
| 334 | + | ||
| 335 | + | ||
| 326 | ### Analyze VBA Source Code | 336 | ### Analyze VBA Source Code |
| 327 | 337 | ||
| 328 | Since version 0.40, the VBA_Parser class provides simpler methods than VBA_Scanner to analyze all macros contained | 338 | Since version 0.40, the VBA_Parser class provides simpler methods than VBA_Scanner to analyze all macros contained |
| @@ -498,4 +508,4 @@ python-oletools documentation | @@ -498,4 +508,4 @@ python-oletools documentation | ||
| 498 | - [[oletimes]] | 508 | - [[oletimes]] |
| 499 | - [[olevba]] | 509 | - [[olevba]] |
| 500 | - [[pyxswf]] | 510 | - [[pyxswf]] |
| 501 | - - [[rtfobj]] | ||
| 502 | \ No newline at end of file | 511 | \ No newline at end of file |
| 512 | + - [[rtfobj]] |
oletools/olevba.py
| @@ -2187,7 +2187,23 @@ class VBA_Parser(object): | @@ -2187,7 +2187,23 @@ class VBA_Parser(object): | ||
| 2187 | return self.analysis_results | 2187 | return self.analysis_results |
| 2188 | 2188 | ||
| 2189 | 2189 | ||
| 2190 | - | 2190 | + def reveal(self): |
| 2191 | + # we only want printable strings: | ||
| 2192 | + analysis = self.analyze_macros(show_decoded_strings=False) | ||
| 2193 | + # to avoid replacing short strings contained into longer strings, we sort the analysis results | ||
| 2194 | + # based on the length of the encoded string, in reverse order: | ||
| 2195 | + analysis = sorted(analysis, key=lambda type_decoded_encoded: len(type_decoded_encoded[2]), reverse=True) | ||
| 2196 | + # normally now self.vba_code_all_modules contains source code from all modules | ||
| 2197 | + deobf_code = self.vba_code_all_modules | ||
| 2198 | + for kw_type, decoded, encoded in analysis: | ||
| 2199 | + if kw_type == 'VBA string': | ||
| 2200 | + #print '%3d occurences: %r => %r' % (deobf_code.count(encoded), encoded, decoded) | ||
| 2201 | + # need to add double quotes around the decoded strings | ||
| 2202 | + # after escaping double-quotes as double-double-quotes for VBA: | ||
| 2203 | + decoded = decoded.replace('"', '""') | ||
| 2204 | + deobf_code = deobf_code.replace(encoded, '"%s"' % decoded) | ||
| 2205 | + return deobf_code | ||
| 2206 | + #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees | ||
| 2191 | 2207 | ||
| 2192 | 2208 | ||
| 2193 | def close(self): | 2209 | def close(self): |
| @@ -2263,28 +2279,6 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -2263,28 +2279,6 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 2263 | print 'No suspicious keyword or IOC found.' | 2279 | print 'No suspicious keyword or IOC found.' |
| 2264 | 2280 | ||
| 2265 | 2281 | ||
| 2266 | - def reveal(self): | ||
| 2267 | - #TODO: move this code to the VBA_Parser class (without print) | ||
| 2268 | - print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n' | ||
| 2269 | - # we only want printable strings: | ||
| 2270 | - analysis = self.analyze_macros(show_decoded_strings=False) | ||
| 2271 | - # to avoid replacing short strings contained into longer strings, we sort the analysis results | ||
| 2272 | - # based on the length of the encoded string, in reverse order: | ||
| 2273 | - analysis = sorted(analysis, key=lambda type_decoded_encoded: len(type_decoded_encoded[2]), reverse=True) | ||
| 2274 | - # normally now self.vba_code_all_modules contains source code from all modules | ||
| 2275 | - deobf_code = self.vba_code_all_modules | ||
| 2276 | - for kw_type, decoded, encoded in analysis: | ||
| 2277 | - if kw_type == 'VBA string': | ||
| 2278 | - #print '%3d occurences: %r => %r' % (deobf_code.count(encoded), encoded, decoded) | ||
| 2279 | - # need to add double quotes around the decoded strings | ||
| 2280 | - # after escaping double-quotes as double-double-quotes for VBA: | ||
| 2281 | - decoded = decoded.replace('"', '""') | ||
| 2282 | - deobf_code = deobf_code.replace(encoded, '"%s"' % decoded) | ||
| 2283 | - print '' | ||
| 2284 | - print deobf_code | ||
| 2285 | - #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees | ||
| 2286 | - | ||
| 2287 | - | ||
| 2288 | def process_file(self, show_decoded_strings=False, | 2282 | def process_file(self, show_decoded_strings=False, |
| 2289 | display_code=True, global_analysis=True, hide_attributes=True, | 2283 | display_code=True, global_analysis=True, hide_attributes=True, |
| 2290 | vba_code_only=False, show_deobfuscated_code=False): | 2284 | vba_code_only=False, show_deobfuscated_code=False): |
| @@ -2341,7 +2335,8 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -2341,7 +2335,8 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 2341 | # analyse the code from all modules at once: | 2335 | # analyse the code from all modules at once: |
| 2342 | self.print_analysis(show_decoded_strings) | 2336 | self.print_analysis(show_decoded_strings) |
| 2343 | if show_deobfuscated_code: | 2337 | if show_deobfuscated_code: |
| 2344 | - self.reveal() | 2338 | + print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n' |
| 2339 | + print self.reveal() | ||
| 2345 | else: | 2340 | else: |
| 2346 | print 'No VBA macros found.' | 2341 | print 'No VBA macros found.' |
| 2347 | except: #TypeError: | 2342 | except: #TypeError: |
| @@ -2548,4 +2543,4 @@ def main(): | @@ -2548,4 +2543,4 @@ def main(): | ||
| 2548 | if __name__ == '__main__': | 2543 | if __name__ == '__main__': |
| 2549 | main() | 2544 | main() |
| 2550 | 2545 | ||
| 2551 | -# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness | ||
| 2552 | \ No newline at end of file | 2546 | \ No newline at end of file |
| 2547 | +# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness |