Commit 986f132ece8dc95146d866fdddd1e429a706d4ad

Authored by decalage2
1 parent fd60886f

rtfobj: added option -s to save objects to files

Showing 1 changed file with 70 additions and 37 deletions
oletools/rtfobj.py
... ... @@ -61,6 +61,7 @@ http://www.decalage.info/python/oletools
61 61 # - backward-compatible API rtf_iter_objects (fixed issue #70)
62 62 # 2016-07-31 PL: - table output with tablestream
63 63 # 2016-08-01 PL: - detect executable filenames in OLE Package
  64 +# 2016-08-08 PL: - added option -s to save objects to files
64 65  
65 66 __version__ = '0.50'
66 67  
... ... @@ -68,6 +69,10 @@ __version__ = '0.50'
68 69 # TODO:
69 70 # - allow semicolon within hex, as found in this sample:
70 71 # http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html
  72 +# TODO: use OleObject and OleNativeStream in RtfObject instead of copying each attribute
  73 +# TODO: option -e <id> to extract an object, -e all for all objects
  74 +# TODO: option to choose which destinations to include (objdata by default)
  75 +# TODO: option to display SHA256 or MD5 hashes of objects in table
71 76  
72 77  
73 78 # === IMPORTS =================================================================
... ... @@ -551,7 +556,7 @@ class RtfObjParser(RtfParser):
551 556 rtfobj.is_package = True
552 557 except:
553 558 pass
554   - log.exception('*** Not an OLE 1.0 Object')
  559 + log.debug('*** Not an OLE 1.0 Object')
555 560  
556 561 def bin(self, bindata):
557 562 if self.current_destination.cword == 'objdata':
... ... @@ -609,7 +614,7 @@ def sanitize_filename(filename, replacement=&#39;_&#39;, max_length=200):
609 614 return sane_fname
610 615  
611 616  
612   -def process_file(container, filename, data, output_dir=None):
  617 +def process_file(container, filename, data, output_dir=None, save_object=False):
613 618 if output_dir:
614 619 if not os.path.isdir(output_dir):
615 620 log.info('creating output directory %s' % output_dir)
... ... @@ -635,47 +640,15 @@ def process_file(container, filename, data, output_dir=None):
635 640 rtfp = RtfObjParser(data)
636 641 rtfp.parse()
637 642 for rtfobj in rtfp.objects:
638   - # print('-'*79)
639   - # print('found object size %d at index %08X - end %08X'
640   - # % (len(rtfobj.rawdata), rtfobj.start, rtfobj.end))
641   - # fname = '%s_object_%08X.raw' % (fname_prefix, rtfobj.start)
642   - # print('saving object to file %s' % fname)
643   - # open(fname, 'wb').write(rtfobj.rawdata)
644 643 pkg_color = None
645 644 if rtfobj.is_ole:
646 645 ole_column = 'format_id: %d\n' % rtfobj.format_id
647 646 ole_column += 'class name: %r\n' % rtfobj.class_name
648 647 ole_column += 'data size: %d' % rtfobj.oledata_size
649   - # print('extract file embedded in OLE object:')
650   - # print('format_id = %d' % rtfobj.format_id)
651   - # print('class name = %r' % rtfobj.class_name)
652   - # print('data size = %d' % rtfobj.oledata_size)
653   - # set a file extension according to the class name:
654   - # class_name = rtfobj.class_name.lower()
655   - # if class_name.startswith(b'word'):
656   - # ext = 'doc'
657   - # elif class_name.startswith(b'package'):
658   - # ext = 'package'
659   - # else:
660   - # ext = 'bin'
661   - # fname = '%s_object_%08X.%s' % (fname_prefix, rtfobj.start, ext)
662   - # print('saving to file %s' % fname)
663   - # open(fname, 'wb').write(rtfobj.oledata)
664 648 if rtfobj.is_package:
665 649 pkg_column = 'Filename: %r\n' % rtfobj.filename
666 650 pkg_column += 'Source path: %r\n' % rtfobj.src_path
667 651 pkg_column += 'Temp path = %r' % rtfobj.temp_path
668   - # print('Parsing OLE Package')
669   - # print('Filename = %r' % rtfobj.filename)
670   - # print('Source path = %r' % rtfobj.src_path)
671   - # print('Temp path = %r' % rtfobj.temp_path)
672   - # if rtfobj.filename:
673   - # fname = '%s_%s' % (fname_prefix,
674   - # sanitize_filename(rtfobj.filename))
675   - # else:
676   - # fname = '%s_object_%08X.noname' % (fname_prefix, rtfobj.start)
677   - # print('saving to file %s' % fname)
678   - # open(fname, 'wb').write(rtfobj.olepkgdata)
679 652 pkg_color = 'yellow'
680 653 # check if the file extension is executable:
681 654 _, ext = os.path.splitext(rtfobj.filename)
... ... @@ -696,6 +669,52 @@ def process_file(container, filename, data, output_dir=None):
696 669 pkg_column
697 670 ), colors=(None, None, None, pkg_color)
698 671 )
  672 + tstream.write_sep()
  673 + if save_object:
  674 + if save_object == 'all':
  675 + objects = rtfp.objects
  676 + else:
  677 + try:
  678 + i = int(save_object)
  679 + objects = [ rtfp.objects[i] ]
  680 + except:
  681 + log.error('The -s option must be followed by an object index or all, such as "-s 2" or "-s all"')
  682 + return
  683 + for rtfobj in objects:
  684 + i = objects.index(rtfobj)
  685 + if rtfobj.is_package:
  686 + print('Saving file from OLE Package in object #%d:' % i)
  687 + print(' Filename = %r' % rtfobj.filename)
  688 + print(' Source path = %r' % rtfobj.src_path)
  689 + print(' Temp path = %r' % rtfobj.temp_path)
  690 + if rtfobj.filename:
  691 + fname = '%s_%s' % (fname_prefix,
  692 + sanitize_filename(rtfobj.filename))
  693 + else:
  694 + fname = '%s_object_%08X.noname' % (fname_prefix, rtfobj.start)
  695 + print(' saving to file %s' % fname)
  696 + open(fname, 'wb').write(rtfobj.olepkgdata)
  697 + elif rtfobj.is_ole:
  698 + print('Saving file embedded in OLE object #%d:' % i)
  699 + print(' format_id = %d' % rtfobj.format_id)
  700 + print(' class name = %r' % rtfobj.class_name)
  701 + print(' data size = %d' % rtfobj.oledata_size)
  702 + # set a file extension according to the class name:
  703 + class_name = rtfobj.class_name.lower()
  704 + if class_name.startswith(b'word'):
  705 + ext = 'doc'
  706 + elif class_name.startswith(b'package'):
  707 + ext = 'package'
  708 + else:
  709 + ext = 'bin'
  710 + fname = '%s_object_%08X.%s' % (fname_prefix, rtfobj.start, ext)
  711 + print(' saving to file %s' % fname)
  712 + open(fname, 'wb').write(rtfobj.oledata)
  713 + else:
  714 + print('Saving raw data in object #%d:' % i)
  715 + fname = '%s_object_%08X.raw' % (fname_prefix, rtfobj.start)
  716 + print(' saving object to file %s' % fname)
  717 + open(fname, 'wb').write(rtfobj.rawdata)
699 718  
700 719  
701 720 #=== MAIN =================================================================
... ... @@ -724,14 +743,27 @@ def main():
724 743 # help='export results to a CSV file')
725 744 parser.add_option("-r", action="store_true", dest="recursive",
726 745 help='find files recursively in subdirectories.')
727   - parser.add_option("-d", type="str", dest="output_dir",
728   - help='use specified directory to output files.', default=None)
729 746 parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
730 747 help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')
731 748 parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
732 749 help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
733 750 parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
734 751 help="logging level debug/info/warning/error/critical (default=%default)")
  752 + parser.add_option("-s", "--save", dest='save_object', type='str', default=None,
  753 + help='Save the object corresponding to the provided number to a file, for example "-s 2". Use "-s all" to save all objects at once.')
  754 + # parser.add_option("-o", "--outfile", dest='outfile', type='str', default=None,
  755 + # help='Filename to be used when saving an object to a file.')
  756 + parser.add_option("-d", type="str", dest="output_dir",
  757 + help='use specified directory to save output files.', default=None)
  758 + # parser.add_option("--pkg", action="store_true", dest="save_pkg",
  759 + # help='Save OLE Package binary data of extracted objects (file embedded into an OLE Package).')
  760 + # parser.add_option("--ole", action="store_true", dest="save_ole",
  761 + # help='Save OLE binary data of extracted objects (object data without the OLE container).')
  762 + # parser.add_option("--raw", action="store_true", dest="save_raw",
  763 + # help='Save raw binary data of extracted objects (decoded from hex, including the OLE container).')
  764 + # parser.add_option("--hex", action="store_true", dest="save_hex",
  765 + # help='Save raw hexadecimal data of extracted objects (including the OLE container).')
  766 +
735 767  
736 768 (options, args) = parser.parse_args()
737 769  
... ... @@ -755,7 +787,8 @@ def main():
755 787 # ignore directory names stored in zip files:
756 788 if container and filename.endswith('/'):
757 789 continue
758   - process_file(container, filename, data, options.output_dir)
  790 + process_file(container, filename, data, output_dir=options.output_dir,
  791 + save_object=options.save_object)
759 792  
760 793  
761 794 if __name__ == '__main__':
... ...