Commit 2ae0ecd2664f29f74dd9b8cf8e6048c7ddfd4512
1 parent
9153fb6c
rtfobj: backward-compatible API rtf_iter_objects (fixed issue #70)
Showing
1 changed file
with
15 additions
and
67 deletions
oletools/rtfobj.py
| ... | ... | @@ -58,6 +58,7 @@ http://www.decalage.info/python/oletools |
| 58 | 58 | # 2016-07-18 SL: - added Python 3.5 support |
| 59 | 59 | # 2016-07-19 PL: - fixed Python 2.6-2.7 support |
| 60 | 60 | # 2016-07-30 PL: - new API with class RtfObject |
| 61 | +# - backward-compatible API rtf_iter_objects (fixed issue #70) | |
| 61 | 62 | |
| 62 | 63 | __version__ = '0.50' |
| 63 | 64 | |
| ... | ... | @@ -558,26 +559,20 @@ class RtfObjParser(RtfParser): |
| 558 | 559 | |
| 559 | 560 | #=== FUNCTIONS =============================================================== |
| 560 | 561 | |
| 561 | -# def rtf_iter_objects_old (filename, min_size=32): | |
| 562 | -# """ | |
| 563 | -# Open a RTF file, extract each embedded object encoded in hexadecimal of | |
| 564 | -# size > min_size, yield the index of the object in the RTF file and its data | |
| 565 | -# in binary format. | |
| 566 | -# This is an iterator. | |
| 567 | -# """ | |
| 568 | -# data = open(filename, 'rb').read() | |
| 569 | -# for m in re.finditer(PATTERN, data): | |
| 570 | -# found = m.group(0) | |
| 571 | -# orig_len = len(found) | |
| 572 | -# # remove all whitespace and line feeds: | |
| 573 | -# #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE | |
| 574 | -# found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v}') | |
| 575 | -# found = binascii.unhexlify(found) | |
| 576 | -# #print repr(found) | |
| 577 | -# if len(found)>min_size: | |
| 578 | -# yield m.start(), orig_len, found | |
| 579 | - | |
| 580 | -# TODO: backward-compatible API? | |
| 562 | +def rtf_iter_objects_old (filename, min_size=32): | |
| 563 | + """ | |
| 564 | + [DEPRECATED] Backward-compatible API, for applications using the old rtfobj: | |
| 565 | + Open a RTF file, extract each embedded object encoded in hexadecimal of | |
| 566 | + size > min_size, yield the index of the object in the RTF file and its data | |
| 567 | + in binary format. | |
| 568 | + This is an iterator. | |
| 569 | + """ | |
| 570 | + data = open(filename, 'rb').read() | |
| 571 | + rtfp = RtfObjParser(data) | |
| 572 | + rtfp.parse() | |
| 573 | + for rtfobj in rtfp.objects: | |
| 574 | + orig_len = rtfobj.end - rtfobj.start | |
| 575 | + yield rtfobj.start, orig_len, rtfobj.rawdata | |
| 581 | 576 | |
| 582 | 577 | |
| 583 | 578 | |
| ... | ... | @@ -667,53 +662,6 @@ def process_file(container, filename, data, output_dir=None): |
| 667 | 662 | |
| 668 | 663 | |
| 669 | 664 | |
| 670 | - # print '-'*79 | |
| 671 | - # print 'File: %r - %d bytes' % (filename, len(data)) | |
| 672 | - # for index, orig_len, objdata in rtf_iter_objects(data): | |
| 673 | - # print 'found object size %d at index %08X - end %08X' % (len(objdata), index, index+orig_len) | |
| 674 | - # fname = '%s_object_%08X.raw' % (fname_prefix, index) | |
| 675 | - # print 'saving object to file %s' % fname | |
| 676 | - # open(fname, 'wb').write(objdata) | |
| 677 | - # # TODO: check if all hex data is extracted properly | |
| 678 | - # | |
| 679 | - # obj = OleObject() | |
| 680 | - # try: | |
| 681 | - # obj.parse(objdata) | |
| 682 | - # print 'extract file embedded in OLE object:' | |
| 683 | - # print 'format_id = %d' % obj.format_id | |
| 684 | - # print 'class name = %r' % obj.class_name | |
| 685 | - # print 'data size = %d' % obj.data_size | |
| 686 | - # # set a file extension according to the class name: | |
| 687 | - # class_name = obj.class_name.lower() | |
| 688 | - # if class_name.startswith('word'): | |
| 689 | - # ext = 'doc' | |
| 690 | - # elif class_name.startswith('package'): | |
| 691 | - # ext = 'package' | |
| 692 | - # else: | |
| 693 | - # ext = 'bin' | |
| 694 | - # | |
| 695 | - # fname = '%s_object_%08X.%s' % (fname_prefix, index, ext) | |
| 696 | - # print 'saving to file %s' % fname | |
| 697 | - # open(fname, 'wb').write(obj.data) | |
| 698 | - # if obj.class_name.lower() == 'package': | |
| 699 | - # print 'Parsing OLE Package' | |
| 700 | - # opkg = OleNativeStream(bindata=obj.data) | |
| 701 | - # print 'Filename = %r' % opkg.filename | |
| 702 | - # print 'Source path = %r' % opkg.src_path | |
| 703 | - # print 'Temp path = %r' % opkg.temp_path | |
| 704 | - # if opkg.filename: | |
| 705 | - # fname = '%s_%s' % (fname_prefix, | |
| 706 | - # sanitize_filename(opkg.filename)) | |
| 707 | - # else: | |
| 708 | - # fname = '%s_object_%08X.noname' % (fname_prefix, index) | |
| 709 | - # print 'saving to file %s' % fname | |
| 710 | - # open(fname, 'wb').write(opkg.data) | |
| 711 | - # except: | |
| 712 | - # pass | |
| 713 | - # log.exception('*** Not an OLE 1.0 Object') | |
| 714 | - | |
| 715 | - | |
| 716 | - | |
| 717 | 665 | #=== MAIN ================================================================= |
| 718 | 666 | |
| 719 | 667 | def main(): | ... | ... |