Commit 6cc7f5b28ccd54e2226a23e904efdab295582f1f
1 parent
2ddca8a3
rtfobj: create is_rtf
Showing
1 changed file
with
53 additions
and
0 deletions
oletools/rtfobj.py
| ... | ... | @@ -303,11 +303,15 @@ if sys.version_info[0] <= 2: |
| 303 | 303 | BACKSLASH = '\\' |
| 304 | 304 | BRACE_OPEN = '{' |
| 305 | 305 | BRACE_CLOSE = '}' |
| 306 | + UNICODE_TYPE = unicode | |
| 306 | 307 | else: |
| 307 | 308 | # Python 3.x - Integers |
| 308 | 309 | BACKSLASH = ord('\\') |
| 309 | 310 | BRACE_OPEN = ord('{') |
| 310 | 311 | BRACE_CLOSE = ord('}') |
| 312 | + UNICODE_TYPE = str | |
| 313 | + | |
| 314 | +RTF_MAGIC = b'\x7b\\rt' # \x7b == b'{' but does not mess up auto-indent | |
| 311 | 315 | |
| 312 | 316 | |
| 313 | 317 | #=== CLASSES ================================================================= |
| ... | ... | @@ -673,7 +677,56 @@ def rtf_iter_objects(filename, min_size=32): |
| 673 | 677 | yield obj.start, orig_len, obj.rawdata |
| 674 | 678 | |
| 675 | 679 | |
| 680 | +def is_rtf(arg, treat_str_as_data=False): | |
| 681 | + """ determine whether given file / stream / array represents an rtf file | |
| 682 | + | |
| 683 | + arg can be either a file name, a byte stream (located at start), a | |
| 684 | + list/tuple or a an iterable that contains bytes. | |
| 676 | 685 | |
| 686 | + For str it is not clear whether data is a file name or the data read from | |
| 687 | + it (at least for py2-str which is bytes). Argument treat_str_as_data | |
| 688 | + clarifies. | |
| 689 | + """ | |
| 690 | + magic_len = len(RTF_MAGIC) | |
| 691 | + if isinstance(arg, UNICODE_TYPE): | |
| 692 | + print('test file name') | |
| 693 | + with open(arg, 'rb') as reader: | |
| 694 | + return reader.read(len(RTF_MAGIC)).lower() == RTF_MAGIC | |
| 695 | + if isinstance(arg, bytes) and not isinstance(arg, str): # only in PY3 | |
| 696 | + print('test byte array') | |
| 697 | + return arg[:magic_len].lower() == RTF_MAGIC | |
| 698 | + if isinstance(arg, bytearray): | |
| 699 | + print('test byte array') | |
| 700 | + return arg[:magic_len].lower() == RTF_MAGIC | |
| 701 | + if isinstance(arg, str): # could be bytes, but we assume file name | |
| 702 | + if treat_str_as_data: | |
| 703 | + try: | |
| 704 | + return arg[:magic_len].encode('ascii', error='strict').lower()\ | |
| 705 | + == RTF_MAGIC | |
| 706 | + except UnicodeError: | |
| 707 | + return False | |
| 708 | + else: | |
| 709 | + print('test file name') | |
| 710 | + with open(arg, 'rb') as reader: | |
| 711 | + return reader.read(len(RTF_MAGIC)).lower() == RTF_MAGIC | |
| 712 | + if hasattr(arg, 'read'): # a stream (i.e. file-like object) | |
| 713 | + print('test stream') | |
| 714 | + return arg.read(len(RTF_MAGIC)).lower() == RTF_MAGIC | |
| 715 | + if isinstance(arg, (list, tuple)): | |
| 716 | + print('test list/tuple') | |
| 717 | + iter_arg = iter(arg) | |
| 718 | + else: | |
| 719 | + print('test iterable') | |
| 720 | + iter_arg = arg | |
| 721 | + | |
| 722 | + # check iterable | |
| 723 | + for magic_byte, upper_cased in zip(RTF_MAGIC, RTF_MAGIC.upper()): | |
| 724 | + try: | |
| 725 | + if next(iter_arg) not in (magic_byte, upper_cased): | |
| 726 | + return False | |
| 727 | + except StopIteration: | |
| 728 | + return False | |
| 729 | + return True # checked the complete magic without returning False --> match | |
| 677 | 730 | |
| 678 | 731 | |
| 679 | 732 | def sanitize_filename(filename, replacement='_', max_length=200): | ... | ... |