Commit 6cc7f5b28ccd54e2226a23e904efdab295582f1f
1 parent
2ddca8a3
rtfobj: create is_rtf
Showing
1 changed file
with
53 additions
and
0 deletions
oletools/rtfobj.py
| @@ -303,11 +303,15 @@ if sys.version_info[0] <= 2: | @@ -303,11 +303,15 @@ if sys.version_info[0] <= 2: | ||
| 303 | BACKSLASH = '\\' | 303 | BACKSLASH = '\\' |
| 304 | BRACE_OPEN = '{' | 304 | BRACE_OPEN = '{' |
| 305 | BRACE_CLOSE = '}' | 305 | BRACE_CLOSE = '}' |
| 306 | + UNICODE_TYPE = unicode | ||
| 306 | else: | 307 | else: |
| 307 | # Python 3.x - Integers | 308 | # Python 3.x - Integers |
| 308 | BACKSLASH = ord('\\') | 309 | BACKSLASH = ord('\\') |
| 309 | BRACE_OPEN = ord('{') | 310 | BRACE_OPEN = ord('{') |
| 310 | BRACE_CLOSE = ord('}') | 311 | BRACE_CLOSE = ord('}') |
| 312 | + UNICODE_TYPE = str | ||
| 313 | + | ||
| 314 | +RTF_MAGIC = b'\x7b\\rt' # \x7b == b'{' but does not mess up auto-indent | ||
| 311 | 315 | ||
| 312 | 316 | ||
| 313 | #=== CLASSES ================================================================= | 317 | #=== CLASSES ================================================================= |
| @@ -673,7 +677,56 @@ def rtf_iter_objects(filename, min_size=32): | @@ -673,7 +677,56 @@ def rtf_iter_objects(filename, min_size=32): | ||
| 673 | yield obj.start, orig_len, obj.rawdata | 677 | yield obj.start, orig_len, obj.rawdata |
| 674 | 678 | ||
| 675 | 679 | ||
| 680 | +def is_rtf(arg, treat_str_as_data=False): | ||
| 681 | + """ determine whether given file / stream / array represents an rtf file | ||
| 682 | + | ||
| 683 | + arg can be either a file name, a byte stream (located at start), a | ||
| 684 | + list/tuple or a an iterable that contains bytes. | ||
| 676 | 685 | ||
| 686 | + For str it is not clear whether data is a file name or the data read from | ||
| 687 | + it (at least for py2-str which is bytes). Argument treat_str_as_data | ||
| 688 | + clarifies. | ||
| 689 | + """ | ||
| 690 | + magic_len = len(RTF_MAGIC) | ||
| 691 | + if isinstance(arg, UNICODE_TYPE): | ||
| 692 | + print('test file name') | ||
| 693 | + with open(arg, 'rb') as reader: | ||
| 694 | + return reader.read(len(RTF_MAGIC)).lower() == RTF_MAGIC | ||
| 695 | + if isinstance(arg, bytes) and not isinstance(arg, str): # only in PY3 | ||
| 696 | + print('test byte array') | ||
| 697 | + return arg[:magic_len].lower() == RTF_MAGIC | ||
| 698 | + if isinstance(arg, bytearray): | ||
| 699 | + print('test byte array') | ||
| 700 | + return arg[:magic_len].lower() == RTF_MAGIC | ||
| 701 | + if isinstance(arg, str): # could be bytes, but we assume file name | ||
| 702 | + if treat_str_as_data: | ||
| 703 | + try: | ||
| 704 | + return arg[:magic_len].encode('ascii', error='strict').lower()\ | ||
| 705 | + == RTF_MAGIC | ||
| 706 | + except UnicodeError: | ||
| 707 | + return False | ||
| 708 | + else: | ||
| 709 | + print('test file name') | ||
| 710 | + with open(arg, 'rb') as reader: | ||
| 711 | + return reader.read(len(RTF_MAGIC)).lower() == RTF_MAGIC | ||
| 712 | + if hasattr(arg, 'read'): # a stream (i.e. file-like object) | ||
| 713 | + print('test stream') | ||
| 714 | + return arg.read(len(RTF_MAGIC)).lower() == RTF_MAGIC | ||
| 715 | + if isinstance(arg, (list, tuple)): | ||
| 716 | + print('test list/tuple') | ||
| 717 | + iter_arg = iter(arg) | ||
| 718 | + else: | ||
| 719 | + print('test iterable') | ||
| 720 | + iter_arg = arg | ||
| 721 | + | ||
| 722 | + # check iterable | ||
| 723 | + for magic_byte, upper_cased in zip(RTF_MAGIC, RTF_MAGIC.upper()): | ||
| 724 | + try: | ||
| 725 | + if next(iter_arg) not in (magic_byte, upper_cased): | ||
| 726 | + return False | ||
| 727 | + except StopIteration: | ||
| 728 | + return False | ||
| 729 | + return True # checked the complete magic without returning False --> match | ||
| 677 | 730 | ||
| 678 | 731 | ||
| 679 | def sanitize_filename(filename, replacement='_', max_length=200): | 732 | def sanitize_filename(filename, replacement='_', max_length=200): |