Commit d9da61845f1a6be240184a2557a40cbca2cd3b76

Authored by Christian Herdtweck
Committed by Philippe Lagadec
1 parent f0a52502

msodde: find dde links in xlsb

Showing 1 changed file with 18 additions and 51 deletions
oletools/msodde.py
@@ -722,62 +722,29 @@ def process_xlsx(filepath, filed_filter_mode=None): @@ -722,62 +722,29 @@ def process_xlsx(filepath, filed_filter_mode=None):
722 link_info.append(elem.attrib['ddeTopic']) 722 link_info.append(elem.attrib['ddeTopic'])
723 dde_links.append(' '.join(link_info)) 723 dde_links.append(' '.join(link_info))
724 724
  725 + # binary parts, e.g. contained in .xlsb
725 for subfile, content_type, handle in parser.iter_non_xml(): 726 for subfile, content_type, handle in parser.iter_non_xml():
726 - log.warning('File contains non-xml part {0} that could not be parsed'  
727 - .format(subfile))  
728 -  
729 - if content_type.startswith('application/vnd.ms-excel.'):  
730 - dde_links.extend(process_xlsb(subfile, content_type, handle))  
731 - raise NotImplementedError('Continue reverse-engineering')  
732 - else:  
733 - magic = handle.read(len(olefile.MAGIC))  
734 - if magic == olefile.MAGIC:  
735 - log.debug('found ole file {0} in excel ooxml'.format(subfile))  
736 - raise NotImplementedError('continue. need to reset stream') 727 + if content_type == 'application/vnd.openxmlformats-officedocument.' + \
  728 + 'spreadsheetml.printerSettings':
  729 + continue # printer settings
  730 + if not content_type.startswith('application/vnd.ms-excel.') and \
  731 + not content_type.startswith('application/vnd.ms-office.'): # pylint: disable=bad-indentation
  732 + logging.warning('Unexpected content type: ' + content_type)
  733 + # try parsing anyway
  734 +
  735 + logging.info('Parsing non-xml subfile {0} with content type {1}'
  736 + .format(subfile, content_type))
  737 + for record in xls_parser.parse_xlsb_part(handle, content_type, subfile):
  738 + logging.debug('{0}: {1}'.format(subfile, record))
  739 + if isinstance(record, xls_parser.XlsbBeginSupBook) and \
  740 + record.link_type == \
  741 + xls_parser.XlsbBeginSupBook.LINK_TYPE_DDE:
  742 + dde_links.append('DDE-Link ' + record.string1 + ' ' +
  743 + record.string2)
737 744
738 return u'\n'.join(dde_links) 745 return u'\n'.join(dde_links)
739 746
740 747
741 -def process_xlsb(subfile, content_type, stream):  
742 - """ Process data contained in a binary part of an OOXML excel file  
743 -  
744 - lots of these in xlsb files  
745 -  
746 - Work in progress, always returns []  
747 -  
748 - Format of these streams seems to roughly have record-like structure like  
749 - xls files (see xls_parser.py), but have to guess a lot since I could not  
750 - find proper description in [MS-XLSB] nor [ECMA-376] nor [MS-OE376]. The  
751 - code here is reverse-engineered from comparing dde-test.xlsb and  
752 - dde-test.xlsx  
753 -  
754 - The author of  
755 - https://www.codeproject.com/Articles/15216/Office-bin-file-format seems to  
756 - have tried to reverse-engineer several .bin streams based on the assumption  
757 - they contain BIFF data.  
758 -  
759 - Anyway, need more test samples to get any reliable results from this.  
760 - """  
761 - log.debug('Trying to parse subfile {0}'.format(subfile))  
762 - while True:  
763 - data = stream.read(3)  
764 - if not data:  
765 - break # end of stream  
766 - type = ord(data[0])  
767 - unknown = ord(data[1])  
768 - size = ord(data[2])  
769 - data = stream.read(size)  
770 -  
771 - log.debug('Record of type {0} unknown part {1} and size {2}: {3}'  
772 - .format(type, unknown, size, data[:64]))  
773 - if len(data) != size:  
774 - log.warning('Stream in {0} does not seem to fit record structure. '  
775 - .format(subfile) +  
776 - '(read {0} bytes but expected {1})'  
777 - .format(len(data), size))  
778 - return []  
779 -  
780 -  
781 def process_file(filepath, field_filter_mode=None): 748 def process_file(filepath, field_filter_mode=None):
782 """ decides which of process_doc/x or process_xls/x to call """ 749 """ decides which of process_doc/x or process_xls/x to call """
783 if olefile.isOleFile(filepath): 750 if olefile.isOleFile(filepath):