Commit e514acb80923c680dbfc5404516c959ffcff9cbb

Authored by Christian Herdtweck
1 parent 04f4b8c5

msodde: ensure file handles are closed after rtf-handling

Showing 1 changed file with 11 additions and 5 deletions
oletools/msodde.py
@@ -58,6 +58,7 @@ from __future__ import print_function @@ -58,6 +58,7 @@ from __future__ import print_function
58 # 2017-11-24 CH: - added support for xls files 58 # 2017-11-24 CH: - added support for xls files
59 # 2017-11-29 CH: - added support for xlsb files 59 # 2017-11-29 CH: - added support for xlsb files
60 # 2017-11-29 PL: - added support for RTF files (issue #223) 60 # 2017-11-29 PL: - added support for RTF files (issue #223)
  61 +# 2017-12-07 CH: - ensure rtf file is closed
61 62
62 __version__ = '0.52dev9' 63 __version__ = '0.52dev9'
63 64
@@ -68,6 +69,7 @@ __version__ = '0.52dev9' @@ -68,6 +69,7 @@ __version__ = '0.52dev9'
68 # TODO: Test with more interesting (real-world?) samples: xls, xlsx, xlsb, docx 69 # TODO: Test with more interesting (real-world?) samples: xls, xlsx, xlsb, docx
69 # TODO: Think about finding all external "connections" of documents, not just 70 # TODO: Think about finding all external "connections" of documents, not just
70 # DDE-Links 71 # DDE-Links
  72 +# TODO: avoid reading complete rtf file data into memory
71 73
72 #------------------------------------------------------------------------------ 74 #------------------------------------------------------------------------------
73 # REFERENCES: 75 # REFERENCES:
@@ -780,11 +782,13 @@ class RtfFieldParser(rtfobj.RtfParser): @@ -780,11 +782,13 @@ class RtfFieldParser(rtfobj.RtfParser):
780 self.current_destination.data += matchobject.group()[1] 782 self.current_destination.data += matchobject.group()[1]
781 783
782 784
  785 +RTF_START = b'\x7b\x5c\x72\x74' # == b'{\rt' but does not mess up auto-indent
783 786
784 -def process_rtf(filepath, field_filter_mode=None): 787 +def process_rtf(file_handle, field_filter_mode=None):
785 log.debug('process_rtf') 788 log.debug('process_rtf')
786 all_fields = [] 789 all_fields = []
787 - data = open(filepath, 'rb').read() 790 + data = RTF_START + file_handle.read() # read complete file into memory!
  791 + file_handle.close()
788 rtfparser = RtfFieldParser(data) 792 rtfparser = RtfFieldParser(data)
789 rtfparser.parse() 793 rtfparser.parse()
790 all_fields = rtfparser.fields 794 all_fields = rtfparser.fields
@@ -814,9 +818,11 @@ def process_file(filepath, field_filter_mode=None): @@ -814,9 +818,11 @@ def process_file(filepath, field_filter_mode=None):
814 return process_xls(filepath) 818 return process_xls(filepath)
815 else: 819 else:
816 return process_doc(filepath) 820 return process_doc(filepath)
817 - elif open(filepath, 'rb').read(4) == b'{\\rt':  
818 - # This is a RTF file  
819 - return process_rtf(filepath, field_filter_mode) 821 + else:
  822 + with open(filepath, 'rb') as file_handle:
  823 + if file_handle.read(4) == RTF_START:
  824 + # This is a RTF file
  825 + return process_rtf(file_handle, field_filter_mode)
820 try: 826 try:
821 doctype = ooxml.get_type(filepath) 827 doctype = ooxml.get_type(filepath)
822 log.debug('Detected file type: {0}'.format(doctype)) 828 log.debug('Detected file type: {0}'.format(doctype))