Commit 2ae0ecd2664f29f74dd9b8cf8e6048c7ddfd4512
1 parent
9153fb6c
rtfobj: backward-compatible API rtf_iter_objects (fixed issue #70)
Showing
1 changed file
with
15 additions
and
67 deletions
oletools/rtfobj.py
| @@ -58,6 +58,7 @@ http://www.decalage.info/python/oletools | @@ -58,6 +58,7 @@ http://www.decalage.info/python/oletools | ||
| 58 | # 2016-07-18 SL: - added Python 3.5 support | 58 | # 2016-07-18 SL: - added Python 3.5 support |
| 59 | # 2016-07-19 PL: - fixed Python 2.6-2.7 support | 59 | # 2016-07-19 PL: - fixed Python 2.6-2.7 support |
| 60 | # 2016-07-30 PL: - new API with class RtfObject | 60 | # 2016-07-30 PL: - new API with class RtfObject |
| 61 | +# - backward-compatible API rtf_iter_objects (fixed issue #70) | ||
| 61 | 62 | ||
| 62 | __version__ = '0.50' | 63 | __version__ = '0.50' |
| 63 | 64 | ||
| @@ -558,26 +559,20 @@ class RtfObjParser(RtfParser): | @@ -558,26 +559,20 @@ class RtfObjParser(RtfParser): | ||
| 558 | 559 | ||
| 559 | #=== FUNCTIONS =============================================================== | 560 | #=== FUNCTIONS =============================================================== |
| 560 | 561 | ||
| 561 | -# def rtf_iter_objects_old (filename, min_size=32): | ||
| 562 | -# """ | ||
| 563 | -# Open a RTF file, extract each embedded object encoded in hexadecimal of | ||
| 564 | -# size > min_size, yield the index of the object in the RTF file and its data | ||
| 565 | -# in binary format. | ||
| 566 | -# This is an iterator. | ||
| 567 | -# """ | ||
| 568 | -# data = open(filename, 'rb').read() | ||
| 569 | -# for m in re.finditer(PATTERN, data): | ||
| 570 | -# found = m.group(0) | ||
| 571 | -# orig_len = len(found) | ||
| 572 | -# # remove all whitespace and line feeds: | ||
| 573 | -# #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE | ||
| 574 | -# found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v}') | ||
| 575 | -# found = binascii.unhexlify(found) | ||
| 576 | -# #print repr(found) | ||
| 577 | -# if len(found)>min_size: | ||
| 578 | -# yield m.start(), orig_len, found | ||
| 579 | - | ||
| 580 | -# TODO: backward-compatible API? | 562 | +def rtf_iter_objects_old (filename, min_size=32): |
| 563 | + """ | ||
| 564 | + [DEPRECATED] Backward-compatible API, for applications using the old rtfobj: | ||
| 565 | + Open a RTF file, extract each embedded object encoded in hexadecimal of | ||
| 566 | + size > min_size, yield the index of the object in the RTF file and its data | ||
| 567 | + in binary format. | ||
| 568 | + This is an iterator. | ||
| 569 | + """ | ||
| 570 | + data = open(filename, 'rb').read() | ||
| 571 | + rtfp = RtfObjParser(data) | ||
| 572 | + rtfp.parse() | ||
| 573 | + for rtfobj in rtfp.objects: | ||
| 574 | + orig_len = rtfobj.end - rtfobj.start | ||
| 575 | + yield rtfobj.start, orig_len, rtfobj.rawdata | ||
| 581 | 576 | ||
| 582 | 577 | ||
| 583 | 578 | ||
| @@ -667,53 +662,6 @@ def process_file(container, filename, data, output_dir=None): | @@ -667,53 +662,6 @@ def process_file(container, filename, data, output_dir=None): | ||
| 667 | 662 | ||
| 668 | 663 | ||
| 669 | 664 | ||
| 670 | - # print '-'*79 | ||
| 671 | - # print 'File: %r - %d bytes' % (filename, len(data)) | ||
| 672 | - # for index, orig_len, objdata in rtf_iter_objects(data): | ||
| 673 | - # print 'found object size %d at index %08X - end %08X' % (len(objdata), index, index+orig_len) | ||
| 674 | - # fname = '%s_object_%08X.raw' % (fname_prefix, index) | ||
| 675 | - # print 'saving object to file %s' % fname | ||
| 676 | - # open(fname, 'wb').write(objdata) | ||
| 677 | - # # TODO: check if all hex data is extracted properly | ||
| 678 | - # | ||
| 679 | - # obj = OleObject() | ||
| 680 | - # try: | ||
| 681 | - # obj.parse(objdata) | ||
| 682 | - # print 'extract file embedded in OLE object:' | ||
| 683 | - # print 'format_id = %d' % obj.format_id | ||
| 684 | - # print 'class name = %r' % obj.class_name | ||
| 685 | - # print 'data size = %d' % obj.data_size | ||
| 686 | - # # set a file extension according to the class name: | ||
| 687 | - # class_name = obj.class_name.lower() | ||
| 688 | - # if class_name.startswith('word'): | ||
| 689 | - # ext = 'doc' | ||
| 690 | - # elif class_name.startswith('package'): | ||
| 691 | - # ext = 'package' | ||
| 692 | - # else: | ||
| 693 | - # ext = 'bin' | ||
| 694 | - # | ||
| 695 | - # fname = '%s_object_%08X.%s' % (fname_prefix, index, ext) | ||
| 696 | - # print 'saving to file %s' % fname | ||
| 697 | - # open(fname, 'wb').write(obj.data) | ||
| 698 | - # if obj.class_name.lower() == 'package': | ||
| 699 | - # print 'Parsing OLE Package' | ||
| 700 | - # opkg = OleNativeStream(bindata=obj.data) | ||
| 701 | - # print 'Filename = %r' % opkg.filename | ||
| 702 | - # print 'Source path = %r' % opkg.src_path | ||
| 703 | - # print 'Temp path = %r' % opkg.temp_path | ||
| 704 | - # if opkg.filename: | ||
| 705 | - # fname = '%s_%s' % (fname_prefix, | ||
| 706 | - # sanitize_filename(opkg.filename)) | ||
| 707 | - # else: | ||
| 708 | - # fname = '%s_object_%08X.noname' % (fname_prefix, index) | ||
| 709 | - # print 'saving to file %s' % fname | ||
| 710 | - # open(fname, 'wb').write(opkg.data) | ||
| 711 | - # except: | ||
| 712 | - # pass | ||
| 713 | - # log.exception('*** Not an OLE 1.0 Object') | ||
| 714 | - | ||
| 715 | - | ||
| 716 | - | ||
| 717 | #=== MAIN ================================================================= | 665 | #=== MAIN ================================================================= |
| 718 | 666 | ||
| 719 | def main(): | 667 | def main(): |