Commit d353c6d94dfe08154958db5ee2ef7f53469621e5

Authored by Philippe Lagadec
2 parents 22ab4465 80804e86

Merge pull request #44 from christian-intra2net/fix-deof-with-json

Fix --deobf option for JSON output
oletools/olevba.py
@@ -2771,12 +2771,14 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2771,12 +2771,14 @@ class VBA_Parser_CLI(VBA_Parser):
2771 else: 2771 else:
2772 print 'No suspicious keyword or IOC found.' 2772 print 'No suspicious keyword or IOC found.'
2773 2773
2774 - def print_analysis_json(self, show_decoded_strings=False): 2774 + def print_analysis_json(self, show_decoded_strings=False, deobfuscate=False):
2775 """ 2775 """
2776 Analyze the provided VBA code, and return the results in json format 2776 Analyze the provided VBA code, and return the results in json format
2777 2777
2778 :param vba_code: str, VBA source code to be analyzed 2778 :param vba_code: str, VBA source code to be analyzed
2779 :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content. 2779 :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
  2780 + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
  2781 +
2780 :return: dict 2782 :return: dict
2781 """ 2783 """
2782 # print a waiting message only if the output is not redirected to a file: 2784 # print a waiting message only if the output is not redirected to a file:
@@ -2784,7 +2786,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2784,7 +2786,7 @@ class VBA_Parser_CLI(VBA_Parser):
2784 print 'Analysis...\r', 2786 print 'Analysis...\r',
2785 sys.stdout.flush() 2787 sys.stdout.flush()
2786 return [dict(type=kw_type, keyword=keyword, description=description) 2788 return [dict(type=kw_type, keyword=keyword, description=description)
2787 - for kw_type, keyword, description in self.analyze_macros(show_decoded_strings)] 2789 + for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)]
2788 2790
2789 def process_file(self, show_decoded_strings=False, 2791 def process_file(self, show_decoded_strings=False,
2790 display_code=True, hide_attributes=True, 2792 display_code=True, hide_attributes=True,
@@ -2856,7 +2858,8 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2856,7 +2858,8 @@ class VBA_Parser_CLI(VBA_Parser):
2856 2858
2857 def process_file_json(self, show_decoded_strings=False, 2859 def process_file_json(self, show_decoded_strings=False,
2858 display_code=True, hide_attributes=True, 2860 display_code=True, hide_attributes=True,
2859 - vba_code_only=False, show_deobfuscated_code=False): 2861 + vba_code_only=False, show_deobfuscated_code=False,
  2862 + deobfuscate=False):
2860 """ 2863 """
2861 Process a single file 2864 Process a single file
2862 2865
@@ -2869,6 +2872,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2869,6 +2872,7 @@ class VBA_Parser_CLI(VBA_Parser):
2869 :param global_analysis: bool, if True all modules are merged for a single analysis (default), 2872 :param global_analysis: bool, if True all modules are merged for a single analysis (default),
2870 otherwise each module is analyzed separately (old behaviour) 2873 otherwise each module is analyzed separately (old behaviour)
2871 :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default) 2874 :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
  2875 + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
2872 """ 2876 """
2873 #TODO: fix conflicting parameters (?) 2877 #TODO: fix conflicting parameters (?)
2874 2878
@@ -2885,6 +2889,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2885,6 +2889,7 @@ class VBA_Parser_CLI(VBA_Parser):
2885 result['json_conversion_successful'] = False 2889 result['json_conversion_successful'] = False
2886 result['analysis'] = None 2890 result['analysis'] = None
2887 result['code_deobfuscated'] = None 2891 result['code_deobfuscated'] = None
  2892 + result['do_deobfuscate'] = deobfuscate
2888 2893
2889 try: 2894 try:
2890 #TODO: handle olefile errors, when an OLE file is malformed 2895 #TODO: handle olefile errors, when an OLE file is malformed
@@ -2904,10 +2909,13 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2904,10 +2909,13 @@ class VBA_Parser_CLI(VBA_Parser):
2904 curr_macro['ole_stream'] = stream_path 2909 curr_macro['ole_stream'] = stream_path
2905 if display_code: 2910 if display_code:
2906 curr_macro['code'] = vba_code_filtered.strip() 2911 curr_macro['code'] = vba_code_filtered.strip()
  2912 + else:
  2913 + curr_macro['code'] = None
2907 macros.append(curr_macro) 2914 macros.append(curr_macro)
2908 if not vba_code_only: 2915 if not vba_code_only:
2909 # analyse the code from all modules at once: 2916 # analyse the code from all modules at once:
2910 - result['analysis'] = self.print_analysis_json(show_decoded_strings) 2917 + result['analysis'] = self.print_analysis_json(show_decoded_strings,
  2918 + deobfuscate)
2911 if show_deobfuscated_code: 2919 if show_deobfuscated_code:
2912 result['code_deobfuscated'] = self.reveal() 2920 result['code_deobfuscated'] = self.reveal()
2913 result['macros'] = macros 2921 result['macros'] = macros
@@ -3063,7 +3071,10 @@ def main(): @@ -3063,7 +3071,10 @@ def main():
3063 3071
3064 # with the option --reveal, make sure --deobf is also enabled: 3072 # with the option --reveal, make sure --deobf is also enabled:
3065 if options.show_deobfuscated_code and not options.deobfuscate: 3073 if options.show_deobfuscated_code and not options.deobfuscate:
  3074 + log.info('set --deobf because --reveal was set')
3066 options.deobfuscate = True 3075 options.deobfuscate = True
  3076 + if options.output_mode == 'triage' and options.show_deobfuscated_code:
  3077 + log.info('ignoring option --reveal in triage output mode')
3067 3078
3068 # Column headers (do not know how many files there will be yet, so if no output_mode 3079 # Column headers (do not know how many files there will be yet, so if no output_mode
3069 # was specified, we will print triage for first file --> need these headers) 3080 # was specified, we will print triage for first file --> need these headers)
@@ -3130,7 +3141,8 @@ def main(): @@ -3130,7 +3141,8 @@ def main():
3130 vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, 3141 vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
3131 display_code=options.display_code, 3142 display_code=options.display_code,
3132 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, 3143 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
3133 - show_deobfuscated_code=options.show_deobfuscated_code)) 3144 + show_deobfuscated_code=options.show_deobfuscated_code,
  3145 + deobfuscate=options.deobfuscate))
3134 else: # (should be impossible) 3146 else: # (should be impossible)
3135 raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) 3147 raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
3136 count += 1 3148 count += 1
oletools/ppt_parser.py
@@ -1568,46 +1568,6 @@ def iterative_decompress(stream, size, chunk_size=4096): @@ -1568,46 +1568,6 @@ def iterative_decompress(stream, size, chunk_size=4096):
1568 1568
1569 return decomp, n_read, return_err 1569 return decomp, n_read, return_err
1570 1570
1571 -# === TESTING =================================================================  
1572 -  
1573 -def test():  
1574 - """ for testing and debugging """  
1575 -  
1576 - from glob import glob  
1577 - from olevba import VBA_Parser  
1578 -  
1579 - # setup logging  
1580 - logging.basicConfig(level=logging.DEBUG,  
1581 - format='%(levelname)-8s %(name)s: %(message)s')  
1582 - log.setLevel(logging.NOTSET)  
1583 -  
1584 - test_files = ['gelaber_autostart.ppt', ]  
1585 - #test_files = glob('*.ppt')  
1586 - for file_name in test_files:  
1587 - # parse  
1588 - log.info('-' * 72)  
1589 - log.info('test file: {}'.format(file_name))  
1590 - try:  
1591 - ppt = PptParser(file_name, fast_fail=False)  
1592 - #ppt.parse_document_persist_object()  
1593 -  
1594 - for vba_data in ppt.iter_vba_data():  
1595 - parser = VBA_Parser(None, vba_data, container='PptParser')  
1596 - for vba_root, project_path, dir_path in \  
1597 - parser.find_vba_projects():  
1598 - log.info('found vba project: root={}, proj={}, dir={}'  
1599 - .format(vba_root, project_path, dir_path))  
1600 - for subfilename, stream_path, vba_filename, vba_code in \  
1601 - parser.extract_all_macros():  
1602 - log.info('found macro: subfile={}, stream={}, vbafile={}'  
1603 - .format(subfilename, stream_path, vba_filename))  
1604 - for line in vba_code.splitlines():  
1605 - log.info('code: {}'.format(line.rstrip()))  
1606 -  
1607 -  
1608 - except Exception:  
1609 - log.exception('exception')  
1610 -  
1611 1571
1612 if __name__ == '__main__': 1572 if __name__ == '__main__':
1613 - test() 1573 + print 'nothing here to run!'