Commit e514acb80923c680dbfc5404516c959ffcff9cbb
1 parent
04f4b8c5
msodde: ensure file handles are closed after rtf-handling
Showing
1 changed file
with
11 additions
and
5 deletions
oletools/msodde.py
| ... | ... | @@ -58,6 +58,7 @@ from __future__ import print_function |
| 58 | 58 | # 2017-11-24 CH: - added support for xls files |
| 59 | 59 | # 2017-11-29 CH: - added support for xlsb files |
| 60 | 60 | # 2017-11-29 PL: - added support for RTF files (issue #223) |
| 61 | +# 2017-12-07 CH: - ensure rtf file is closed | |
| 61 | 62 | |
| 62 | 63 | __version__ = '0.52dev9' |
| 63 | 64 | |
| ... | ... | @@ -68,6 +69,7 @@ __version__ = '0.52dev9' |
| 68 | 69 | # TODO: Test with more interesting (real-world?) samples: xls, xlsx, xlsb, docx |
| 69 | 70 | # TODO: Think about finding all external "connections" of documents, not just |
| 70 | 71 | # DDE-Links |
| 72 | +# TODO: avoid reading complete rtf file data into memory | |
| 71 | 73 | |
| 72 | 74 | #------------------------------------------------------------------------------ |
| 73 | 75 | # REFERENCES: |
| ... | ... | @@ -780,11 +782,13 @@ class RtfFieldParser(rtfobj.RtfParser): |
| 780 | 782 | self.current_destination.data += matchobject.group()[1] |
| 781 | 783 | |
| 782 | 784 | |
| 785 | +RTF_START = b'\x7b\x5c\x72\x74' # == b'{\rt' but does not mess up auto-indent | |
| 783 | 786 | |
| 784 | -def process_rtf(filepath, field_filter_mode=None): | |
| 787 | +def process_rtf(file_handle, field_filter_mode=None): | |
| 785 | 788 | log.debug('process_rtf') |
| 786 | 789 | all_fields = [] |
| 787 | - data = open(filepath, 'rb').read() | |
| 790 | + data = RTF_START + file_handle.read() # read complete file into memory! | |
| 791 | + file_handle.close() | |
| 788 | 792 | rtfparser = RtfFieldParser(data) |
| 789 | 793 | rtfparser.parse() |
| 790 | 794 | all_fields = rtfparser.fields |
| ... | ... | @@ -814,9 +818,11 @@ def process_file(filepath, field_filter_mode=None): |
| 814 | 818 | return process_xls(filepath) |
| 815 | 819 | else: |
| 816 | 820 | return process_doc(filepath) |
| 817 | - elif open(filepath, 'rb').read(4) == b'{\\rt': | |
| 818 | - # This is a RTF file | |
| 819 | - return process_rtf(filepath, field_filter_mode) | |
| 821 | + else: | |
| 822 | + with open(filepath, 'rb') as file_handle: | |
| 823 | + if file_handle.read(4) == RTF_START: | |
| 824 | + # This is a RTF file | |
| 825 | + return process_rtf(file_handle, field_filter_mode) | |
| 820 | 826 | try: |
| 821 | 827 | doctype = ooxml.get_type(filepath) |
| 822 | 828 | log.debug('Detected file type: {0}'.format(doctype)) | ... | ... |