Commit d9da61845f1a6be240184a2557a40cbca2cd3b76

Authored by Christian Herdtweck
Committed by Philippe Lagadec
1 parent f0a52502

msodde: find dde links in xlsb

Showing 1 changed file with 18 additions and 51 deletions
oletools/msodde.py
... ... @@ -722,62 +722,29 @@ def process_xlsx(filepath, filed_filter_mode=None):
722 722 link_info.append(elem.attrib['ddeTopic'])
723 723 dde_links.append(' '.join(link_info))
724 724  
  725 + # binary parts, e.g. contained in .xlsb
725 726 for subfile, content_type, handle in parser.iter_non_xml():
726   - log.warning('File contains non-xml part {0} that could not be parsed'
727   - .format(subfile))
728   -
729   - if content_type.startswith('application/vnd.ms-excel.'):
730   - dde_links.extend(process_xlsb(subfile, content_type, handle))
731   - raise NotImplementedError('Continue reverse-engineering')
732   - else:
733   - magic = handle.read(len(olefile.MAGIC))
734   - if magic == olefile.MAGIC:
735   - log.debug('found ole file {0} in excel ooxml'.format(subfile))
736   - raise NotImplementedError('continue. need to reset stream')
  727 + if content_type == 'application/vnd.openxmlformats-officedocument.' + \
  728 + 'spreadsheetml.printerSettings':
  729 + continue # printer settings
  730 + if not content_type.startswith('application/vnd.ms-excel.') and \
  731 + not content_type.startswith('application/vnd.ms-office.'): # pylint: disable=bad-indentation
  732 + logging.warning('Unexpected content type: ' + content_type)
  733 + # try parsing anyway
  734 +
  735 + logging.info('Parsing non-xml subfile {0} with content type {1}'
  736 + .format(subfile, content_type))
  737 + for record in xls_parser.parse_xlsb_part(handle, content_type, subfile):
  738 + logging.debug('{0}: {1}'.format(subfile, record))
  739 + if isinstance(record, xls_parser.XlsbBeginSupBook) and \
  740 + record.link_type == \
  741 + xls_parser.XlsbBeginSupBook.LINK_TYPE_DDE:
  742 + dde_links.append('DDE-Link ' + record.string1 + ' ' +
  743 + record.string2)
737 744  
738 745 return u'\n'.join(dde_links)
739 746  
740 747  
741   -def process_xlsb(subfile, content_type, stream):
742   - """ Process data contained in a binary part of an OOXML excel file
743   -
744   - lots of these in xlsb files
745   -
746   - Work in progress, always returns []
747   -
748   - Format of these streams seems to roughly have record-like structure like
749   - xls files (see xls_parser.py), but have to guess a lot since I could not
750   - find proper description in [MS-XLSB] nor [ECMA-376] nor [MS-OE376]. The
751   - code here is reverse-engineered from comparing dde-test.xlsb and
752   - dde-test.xlsx
753   -
754   - The author of
755   - https://www.codeproject.com/Articles/15216/Office-bin-file-format seems to
756   - have tried to reverse-engineer several .bin streams based on the assumption
757   - they contain BIFF data.
758   -
759   - Anyway, need more test samples to get any reliable results from this.
760   - """
761   - log.debug('Trying to parse subfile {0}'.format(subfile))
762   - while True:
763   - data = stream.read(3)
764   - if not data:
765   - break # end of stream
766   - type = ord(data[0])
767   - unknown = ord(data[1])
768   - size = ord(data[2])
769   - data = stream.read(size)
770   -
771   - log.debug('Record of type {0} unknown part {1} and size {2}: {3}'
772   - .format(type, unknown, size, data[:64]))
773   - if len(data) != size:
774   - log.warning('Stream in {0} does not seem to fit record structure. '
775   - .format(subfile) +
776   - '(read {0} bytes but expected {1})'
777   - .format(len(data), size))
778   - return []
779   -
780   -
781 748 def process_file(filepath, field_filter_mode=None):
782 749 """ decides which of process_doc/x or process_xls/x to call """
783 750 if olefile.isOleFile(filepath):
... ...