Commit 6cc7f5b28ccd54e2226a23e904efdab295582f1f

Authored by Christian Herdtweck
1 parent 2ddca8a3

rtfobj: create is_rtf

Showing 1 changed file with 53 additions and 0 deletions
oletools/rtfobj.py
... ... @@ -303,11 +303,15 @@ if sys.version_info[0] <= 2:
303 303 BACKSLASH = '\\'
304 304 BRACE_OPEN = '{'
305 305 BRACE_CLOSE = '}'
  306 + UNICODE_TYPE = unicode
306 307 else:
307 308 # Python 3.x - Integers
308 309 BACKSLASH = ord('\\')
309 310 BRACE_OPEN = ord('{')
310 311 BRACE_CLOSE = ord('}')
  312 + UNICODE_TYPE = str
  313 +
  314 +RTF_MAGIC = b'\x7b\\rt' # \x7b == b'{' but does not mess up auto-indent
311 315  
312 316  
313 317 #=== CLASSES =================================================================
... ... @@ -673,7 +677,56 @@ def rtf_iter_objects(filename, min_size=32):
673 677 yield obj.start, orig_len, obj.rawdata
674 678  
675 679  
  680 +def is_rtf(arg, treat_str_as_data=False):
  681 + """ determine whether given file / stream / array represents an rtf file
  682 +
  683 + arg can be either a file name, a byte stream (located at start), a
  684 + list/tuple or a an iterable that contains bytes.
676 685  
  686 + For str it is not clear whether data is a file name or the data read from
  687 + it (at least for py2-str which is bytes). Argument treat_str_as_data
  688 + clarifies.
  689 + """
  690 + magic_len = len(RTF_MAGIC)
  691 + if isinstance(arg, UNICODE_TYPE):
  692 + print('test file name')
  693 + with open(arg, 'rb') as reader:
  694 + return reader.read(len(RTF_MAGIC)).lower() == RTF_MAGIC
  695 + if isinstance(arg, bytes) and not isinstance(arg, str): # only in PY3
  696 + print('test byte array')
  697 + return arg[:magic_len].lower() == RTF_MAGIC
  698 + if isinstance(arg, bytearray):
  699 + print('test byte array')
  700 + return arg[:magic_len].lower() == RTF_MAGIC
  701 + if isinstance(arg, str): # could be bytes, but we assume file name
  702 + if treat_str_as_data:
  703 + try:
  704 + return arg[:magic_len].encode('ascii', error='strict').lower()\
  705 + == RTF_MAGIC
  706 + except UnicodeError:
  707 + return False
  708 + else:
  709 + print('test file name')
  710 + with open(arg, 'rb') as reader:
  711 + return reader.read(len(RTF_MAGIC)).lower() == RTF_MAGIC
  712 + if hasattr(arg, 'read'): # a stream (i.e. file-like object)
  713 + print('test stream')
  714 + return arg.read(len(RTF_MAGIC)).lower() == RTF_MAGIC
  715 + if isinstance(arg, (list, tuple)):
  716 + print('test list/tuple')
  717 + iter_arg = iter(arg)
  718 + else:
  719 + print('test iterable')
  720 + iter_arg = arg
  721 +
  722 + # check iterable
  723 + for magic_byte, upper_cased in zip(RTF_MAGIC, RTF_MAGIC.upper()):
  724 + try:
  725 + if next(iter_arg) not in (magic_byte, upper_cased):
  726 + return False
  727 + except StopIteration:
  728 + return False
  729 + return True # checked the complete magic without returning False --> match
677 730  
678 731  
679 732 def sanitize_filename(filename, replacement='_', max_length=200):
... ...