Commit e514acb80923c680dbfc5404516c959ffcff9cbb

Authored by Christian Herdtweck
1 parent 04f4b8c5

msodde: ensure file handles are closed after rtf-handling

Showing 1 changed file with 11 additions and 5 deletions
oletools/msodde.py
... ... @@ -58,6 +58,7 @@ from __future__ import print_function
58 58 # 2017-11-24 CH: - added support for xls files
59 59 # 2017-11-29 CH: - added support for xlsb files
60 60 # 2017-11-29 PL: - added support for RTF files (issue #223)
  61 +# 2017-12-07 CH: - ensure rtf file is closed
61 62  
62 63 __version__ = '0.52dev9'
63 64  
... ... @@ -68,6 +69,7 @@ __version__ = '0.52dev9'
68 69 # TODO: Test with more interesting (real-world?) samples: xls, xlsx, xlsb, docx
69 70 # TODO: Think about finding all external "connections" of documents, not just
70 71 # DDE-Links
  72 +# TODO: avoid reading complete rtf file data into memory
71 73  
72 74 #------------------------------------------------------------------------------
73 75 # REFERENCES:
... ... @@ -780,11 +782,13 @@ class RtfFieldParser(rtfobj.RtfParser):
780 782 self.current_destination.data += matchobject.group()[1]
781 783  
782 784  
  785 +RTF_START = b'\x7b\x5c\x72\x74' # == b'{\rt' but does not mess up auto-indent
783 786  
784   -def process_rtf(filepath, field_filter_mode=None):
  787 +def process_rtf(file_handle, field_filter_mode=None):
785 788 log.debug('process_rtf')
786 789 all_fields = []
787   - data = open(filepath, 'rb').read()
  790 + data = RTF_START + file_handle.read() # read complete file into memory!
  791 + file_handle.close()
788 792 rtfparser = RtfFieldParser(data)
789 793 rtfparser.parse()
790 794 all_fields = rtfparser.fields
... ... @@ -814,9 +818,11 @@ def process_file(filepath, field_filter_mode=None):
814 818 return process_xls(filepath)
815 819 else:
816 820 return process_doc(filepath)
817   - elif open(filepath, 'rb').read(4) == b'{\\rt':
818   - # This is a RTF file
819   - return process_rtf(filepath, field_filter_mode)
  821 + else:
  822 + with open(filepath, 'rb') as file_handle:
  823 + if file_handle.read(4) == RTF_START:
  824 + # This is a RTF file
  825 + return process_rtf(file_handle, field_filter_mode)
820 826 try:
821 827 doctype = ooxml.get_type(filepath)
822 828 log.debug('Detected file type: {0}'.format(doctype))
... ...