Commit 579c1386f3010ddd6eb7004af9428f083f2bcc45
1 parent
29276cd0
rtfobj: added support for RTF files (issue #223)
Showing
2 changed files
with
63 additions
and
2 deletions
oletools/msodde.py
| @@ -57,8 +57,9 @@ from __future__ import print_function | @@ -57,8 +57,9 @@ from __future__ import print_function | ||
| 57 | # 2017-11-23 CH: - added support for xlsx files | 57 | # 2017-11-23 CH: - added support for xlsx files |
| 58 | # 2017-11-24 CH: - added support for xls files | 58 | # 2017-11-24 CH: - added support for xls files |
| 59 | # 2017-11-29 CH: - added support for xlsb files | 59 | # 2017-11-29 CH: - added support for xlsb files |
| 60 | +# 2017-11-29 PL: - added support for RTF files (issue #223) | ||
| 60 | 61 | ||
| 61 | -__version__ = '0.52dev8' | 62 | +__version__ = '0.52dev9' |
| 62 | 63 | ||
| 63 | #------------------------------------------------------------------------------ | 64 | #------------------------------------------------------------------------------ |
| 64 | # TODO: field codes can be in headers/footers/comments - parse these | 65 | # TODO: field codes can be in headers/footers/comments - parse these |
| @@ -100,6 +101,7 @@ if not _parent_dir in sys.path: | @@ -100,6 +101,7 @@ if not _parent_dir in sys.path: | ||
| 100 | from oletools.thirdparty import olefile | 101 | from oletools.thirdparty import olefile |
| 101 | from oletools import ooxml | 102 | from oletools import ooxml |
| 102 | from oletools import xls_parser | 103 | from oletools import xls_parser |
| 104 | +from oletools import rtfobj | ||
| 103 | 105 | ||
| 104 | # === PYTHON 2+3 SUPPORT ====================================================== | 106 | # === PYTHON 2+3 SUPPORT ====================================================== |
| 105 | 107 | ||
| @@ -748,6 +750,62 @@ def process_xlsx(filepath, filed_filter_mode=None): | @@ -748,6 +750,62 @@ def process_xlsx(filepath, filed_filter_mode=None): | ||
| 748 | return u'\n'.join(dde_links) | 750 | return u'\n'.join(dde_links) |
| 749 | 751 | ||
| 750 | 752 | ||
| 753 | +class RtfFieldParser(rtfobj.RtfParser): | ||
| 754 | + """ | ||
| 755 | + Specialized RTF parser to extract fields such as DDEAUTO | ||
| 756 | + """ | ||
| 757 | + | ||
| 758 | + def __init__(self, data): | ||
| 759 | + super(RtfFieldParser, self).__init__(data) | ||
| 760 | + # list of RtfObjects found | ||
| 761 | + self.fields = [] | ||
| 762 | + | ||
| 763 | + def open_destination(self, destination): | ||
| 764 | + if destination.cword == b'fldinst': | ||
| 765 | + log.debug('*** Start field data at index %Xh' % destination.start) | ||
| 766 | + | ||
| 767 | + def close_destination(self, destination): | ||
| 768 | + if destination.cword == b'fldinst': | ||
| 769 | + log.debug('*** Close field data at index %Xh' % self.index) | ||
| 770 | + log.debug('Field text: %r' % destination.data) | ||
| 771 | + # remove extra spaces and newline chars: | ||
| 772 | + field_clean = destination.data.translate(None, b'\r\n').strip() | ||
| 773 | + log.debug('Cleaned Field text: %r' % field_clean) | ||
| 774 | + self.fields.append(field_clean) | ||
| 775 | + | ||
| 776 | + def control_symbol(self, matchobject): | ||
| 777 | + # required to handle control symbols such as '\\' | ||
| 778 | + # inject the symbol as-is in the text: | ||
| 779 | + # TODO: handle special symbols properly | ||
| 780 | + self.current_destination.data += matchobject.group()[1] | ||
| 781 | + | ||
| 782 | + | ||
| 783 | + | ||
| 784 | +def process_rtf(filepath, field_filter_mode=None): | ||
| 785 | + log.debug('process_rtf') | ||
| 786 | + all_fields = [] | ||
| 787 | + data = open(filepath, 'rb').read() | ||
| 788 | + rtfparser = RtfFieldParser(data) | ||
| 789 | + rtfparser.parse() | ||
| 790 | + all_fields = rtfparser.fields | ||
| 791 | + # apply field command filter | ||
| 792 | + log.debug('filtering with mode "{0}"'.format(field_filter_mode)) | ||
| 793 | + if field_filter_mode in (FIELD_FILTER_ALL, None): | ||
| 794 | + clean_fields = all_fields | ||
| 795 | + elif field_filter_mode == FIELD_FILTER_DDE: | ||
| 796 | + clean_fields = [field for field in all_fields | ||
| 797 | + if FIELD_DDE_REGEX.match(field)] | ||
| 798 | + elif field_filter_mode == FIELD_FILTER_BLACKLIST: | ||
| 799 | + # check if fields are acceptable and should not be returned | ||
| 800 | + clean_fields = [field for field in all_fields | ||
| 801 | + if not field_is_blacklisted(field.strip())] | ||
| 802 | + else: | ||
| 803 | + raise ValueError('Unexpected field_filter_mode: "{0}"' | ||
| 804 | + .format(field_filter_mode)) | ||
| 805 | + | ||
| 806 | + return u'\n'.join(clean_fields) | ||
| 807 | + | ||
| 808 | + | ||
| 751 | def process_file(filepath, field_filter_mode=None): | 809 | def process_file(filepath, field_filter_mode=None): |
| 752 | """ decides which of process_doc/x or process_xls/x to call """ | 810 | """ decides which of process_doc/x or process_xls/x to call """ |
| 753 | if olefile.isOleFile(filepath): | 811 | if olefile.isOleFile(filepath): |
| @@ -756,6 +814,9 @@ def process_file(filepath, field_filter_mode=None): | @@ -756,6 +814,9 @@ def process_file(filepath, field_filter_mode=None): | ||
| 756 | return process_xls(filepath) | 814 | return process_xls(filepath) |
| 757 | else: | 815 | else: |
| 758 | return process_doc(filepath) | 816 | return process_doc(filepath) |
| 817 | + elif open(filepath, 'rb').read(4) == b'{\\rt': | ||
| 818 | + # This is a RTF file | ||
| 819 | + return process_rtf(filepath, field_filter_mode) | ||
| 759 | try: | 820 | try: |
| 760 | doctype = ooxml.get_type(filepath) | 821 | doctype = ooxml.get_type(filepath) |
| 761 | log.debug('Detected file type: {0}'.format(doctype)) | 822 | log.debug('Detected file type: {0}'.format(doctype)) |
setup.py
| @@ -42,7 +42,7 @@ import os, fnmatch | @@ -42,7 +42,7 @@ import os, fnmatch | ||
| 42 | #--- METADATA ----------------------------------------------------------------- | 42 | #--- METADATA ----------------------------------------------------------------- |
| 43 | 43 | ||
| 44 | name = "oletools" | 44 | name = "oletools" |
| 45 | -version = '0.52dev5' | 45 | +version = '0.52dev9' |
| 46 | desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" | 46 | desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" |
| 47 | long_desc = open('oletools/README.rst').read() | 47 | long_desc = open('oletools/README.rst').read() |
| 48 | author = "Philippe Lagadec" | 48 | author = "Philippe Lagadec" |