Commit 2ae0ecd2664f29f74dd9b8cf8e6048c7ddfd4512

Authored by decalage2
1 parent 9153fb6c

rtfobj: backward-compatible API rtf_iter_objects (fixed issue #70)

Showing 1 changed file with 15 additions and 67 deletions
oletools/rtfobj.py
@@ -58,6 +58,7 @@ http://www.decalage.info/python/oletools @@ -58,6 +58,7 @@ http://www.decalage.info/python/oletools
58 # 2016-07-18 SL: - added Python 3.5 support 58 # 2016-07-18 SL: - added Python 3.5 support
59 # 2016-07-19 PL: - fixed Python 2.6-2.7 support 59 # 2016-07-19 PL: - fixed Python 2.6-2.7 support
60 # 2016-07-30 PL: - new API with class RtfObject 60 # 2016-07-30 PL: - new API with class RtfObject
  61 +# - backward-compatible API rtf_iter_objects (fixed issue #70)
61 62
62 __version__ = '0.50' 63 __version__ = '0.50'
63 64
@@ -558,26 +559,20 @@ class RtfObjParser(RtfParser): @@ -558,26 +559,20 @@ class RtfObjParser(RtfParser):
558 559
559 #=== FUNCTIONS =============================================================== 560 #=== FUNCTIONS ===============================================================
560 561
561 -# def rtf_iter_objects_old (filename, min_size=32):  
562 -# """  
563 -# Open a RTF file, extract each embedded object encoded in hexadecimal of  
564 -# size > min_size, yield the index of the object in the RTF file and its data  
565 -# in binary format.  
566 -# This is an iterator.  
567 -# """  
568 -# data = open(filename, 'rb').read()  
569 -# for m in re.finditer(PATTERN, data):  
570 -# found = m.group(0)  
571 -# orig_len = len(found)  
572 -# # remove all whitespace and line feeds:  
573 -# #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE  
574 -# found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v}')  
575 -# found = binascii.unhexlify(found)  
576 -# #print repr(found)  
577 -# if len(found)>min_size:  
578 -# yield m.start(), orig_len, found  
579 -  
580 -# TODO: backward-compatible API? 562 +def rtf_iter_objects_old (filename, min_size=32):
  563 + """
  564 + [DEPRECATED] Backward-compatible API, for applications using the old rtfobj:
  565 + Open a RTF file, extract each embedded object encoded in hexadecimal of
  566 + size > min_size, yield the index of the object in the RTF file and its data
  567 + in binary format.
  568 + This is an iterator.
  569 + """
  570 + data = open(filename, 'rb').read()
  571 + rtfp = RtfObjParser(data)
  572 + rtfp.parse()
  573 + for rtfobj in rtfp.objects:
  574 + orig_len = rtfobj.end - rtfobj.start
  575 + yield rtfobj.start, orig_len, rtfobj.rawdata
581 576
582 577
583 578
@@ -667,53 +662,6 @@ def process_file(container, filename, data, output_dir=None): @@ -667,53 +662,6 @@ def process_file(container, filename, data, output_dir=None):
667 662
668 663
669 664
670 - # print '-'*79  
671 - # print 'File: %r - %d bytes' % (filename, len(data))  
672 - # for index, orig_len, objdata in rtf_iter_objects(data):  
673 - # print 'found object size %d at index %08X - end %08X' % (len(objdata), index, index+orig_len)  
674 - # fname = '%s_object_%08X.raw' % (fname_prefix, index)  
675 - # print 'saving object to file %s' % fname  
676 - # open(fname, 'wb').write(objdata)  
677 - # # TODO: check if all hex data is extracted properly  
678 - #  
679 - # obj = OleObject()  
680 - # try:  
681 - # obj.parse(objdata)  
682 - # print 'extract file embedded in OLE object:'  
683 - # print 'format_id = %d' % obj.format_id  
684 - # print 'class name = %r' % obj.class_name  
685 - # print 'data size = %d' % obj.data_size  
686 - # # set a file extension according to the class name:  
687 - # class_name = obj.class_name.lower()  
688 - # if class_name.startswith('word'):  
689 - # ext = 'doc'  
690 - # elif class_name.startswith('package'):  
691 - # ext = 'package'  
692 - # else:  
693 - # ext = 'bin'  
694 - #  
695 - # fname = '%s_object_%08X.%s' % (fname_prefix, index, ext)  
696 - # print 'saving to file %s' % fname  
697 - # open(fname, 'wb').write(obj.data)  
698 - # if obj.class_name.lower() == 'package':  
699 - # print 'Parsing OLE Package'  
700 - # opkg = OleNativeStream(bindata=obj.data)  
701 - # print 'Filename = %r' % opkg.filename  
702 - # print 'Source path = %r' % opkg.src_path  
703 - # print 'Temp path = %r' % opkg.temp_path  
704 - # if opkg.filename:  
705 - # fname = '%s_%s' % (fname_prefix,  
706 - # sanitize_filename(opkg.filename))  
707 - # else:  
708 - # fname = '%s_object_%08X.noname' % (fname_prefix, index)  
709 - # print 'saving to file %s' % fname  
710 - # open(fname, 'wb').write(opkg.data)  
711 - # except:  
712 - # pass  
713 - # log.exception('*** Not an OLE 1.0 Object')  
714 -  
715 -  
716 -  
717 #=== MAIN ================================================================= 665 #=== MAIN =================================================================
718 666
719 def main(): 667 def main():