Commit 2ae0ecd2664f29f74dd9b8cf8e6048c7ddfd4512

Authored by decalage2
1 parent 9153fb6c

rtfobj: backward-compatible API rtf_iter_objects (fixed issue #70)

Showing 1 changed file with 15 additions and 67 deletions
oletools/rtfobj.py
... ... @@ -58,6 +58,7 @@ http://www.decalage.info/python/oletools
58 58 # 2016-07-18 SL: - added Python 3.5 support
59 59 # 2016-07-19 PL: - fixed Python 2.6-2.7 support
60 60 # 2016-07-30 PL: - new API with class RtfObject
  61 +# - backward-compatible API rtf_iter_objects (fixed issue #70)
61 62  
62 63 __version__ = '0.50'
63 64  
... ... @@ -558,26 +559,20 @@ class RtfObjParser(RtfParser):
558 559  
559 560 #=== FUNCTIONS ===============================================================
560 561  
561   -# def rtf_iter_objects_old (filename, min_size=32):
562   -# """
563   -# Open a RTF file, extract each embedded object encoded in hexadecimal of
564   -# size > min_size, yield the index of the object in the RTF file and its data
565   -# in binary format.
566   -# This is an iterator.
567   -# """
568   -# data = open(filename, 'rb').read()
569   -# for m in re.finditer(PATTERN, data):
570   -# found = m.group(0)
571   -# orig_len = len(found)
572   -# # remove all whitespace and line feeds:
573   -# #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE
574   -# found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v}')
575   -# found = binascii.unhexlify(found)
576   -# #print repr(found)
577   -# if len(found)>min_size:
578   -# yield m.start(), orig_len, found
579   -
580   -# TODO: backward-compatible API?
  562 +def rtf_iter_objects_old (filename, min_size=32):
  563 + """
  564 + [DEPRECATED] Backward-compatible API, for applications using the old rtfobj:
  565 + Open a RTF file, extract each embedded object encoded in hexadecimal of
  566 + size > min_size, yield the index of the object in the RTF file and its data
  567 + in binary format.
  568 + This is an iterator.
  569 + """
  570 + data = open(filename, 'rb').read()
  571 + rtfp = RtfObjParser(data)
  572 + rtfp.parse()
  573 + for rtfobj in rtfp.objects:
  574 + orig_len = rtfobj.end - rtfobj.start
  575 + yield rtfobj.start, orig_len, rtfobj.rawdata
581 576  
582 577  
583 578  
... ... @@ -667,53 +662,6 @@ def process_file(container, filename, data, output_dir=None):
667 662  
668 663  
669 664  
670   - # print '-'*79
671   - # print 'File: %r - %d bytes' % (filename, len(data))
672   - # for index, orig_len, objdata in rtf_iter_objects(data):
673   - # print 'found object size %d at index %08X - end %08X' % (len(objdata), index, index+orig_len)
674   - # fname = '%s_object_%08X.raw' % (fname_prefix, index)
675   - # print 'saving object to file %s' % fname
676   - # open(fname, 'wb').write(objdata)
677   - # # TODO: check if all hex data is extracted properly
678   - #
679   - # obj = OleObject()
680   - # try:
681   - # obj.parse(objdata)
682   - # print 'extract file embedded in OLE object:'
683   - # print 'format_id = %d' % obj.format_id
684   - # print 'class name = %r' % obj.class_name
685   - # print 'data size = %d' % obj.data_size
686   - # # set a file extension according to the class name:
687   - # class_name = obj.class_name.lower()
688   - # if class_name.startswith('word'):
689   - # ext = 'doc'
690   - # elif class_name.startswith('package'):
691   - # ext = 'package'
692   - # else:
693   - # ext = 'bin'
694   - #
695   - # fname = '%s_object_%08X.%s' % (fname_prefix, index, ext)
696   - # print 'saving to file %s' % fname
697   - # open(fname, 'wb').write(obj.data)
698   - # if obj.class_name.lower() == 'package':
699   - # print 'Parsing OLE Package'
700   - # opkg = OleNativeStream(bindata=obj.data)
701   - # print 'Filename = %r' % opkg.filename
702   - # print 'Source path = %r' % opkg.src_path
703   - # print 'Temp path = %r' % opkg.temp_path
704   - # if opkg.filename:
705   - # fname = '%s_%s' % (fname_prefix,
706   - # sanitize_filename(opkg.filename))
707   - # else:
708   - # fname = '%s_object_%08X.noname' % (fname_prefix, index)
709   - # print 'saving to file %s' % fname
710   - # open(fname, 'wb').write(opkg.data)
711   - # except:
712   - # pass
713   - # log.exception('*** Not an OLE 1.0 Object')
714   -
715   -
716   -
717 665 #=== MAIN =================================================================
718 666  
719 667 def main():
... ...