Commit d9da61845f1a6be240184a2557a40cbca2cd3b76
Committed by
Philippe Lagadec
1 parent
f0a52502
msodde: find dde links in xlsb
Showing
1 changed file
with
18 additions
and
51 deletions
oletools/msodde.py
| @@ -722,62 +722,29 @@ def process_xlsx(filepath, filed_filter_mode=None): | @@ -722,62 +722,29 @@ def process_xlsx(filepath, filed_filter_mode=None): | ||
| 722 | link_info.append(elem.attrib['ddeTopic']) | 722 | link_info.append(elem.attrib['ddeTopic']) |
| 723 | dde_links.append(' '.join(link_info)) | 723 | dde_links.append(' '.join(link_info)) |
| 724 | 724 | ||
| 725 | + # binary parts, e.g. contained in .xlsb | ||
| 725 | for subfile, content_type, handle in parser.iter_non_xml(): | 726 | for subfile, content_type, handle in parser.iter_non_xml(): |
| 726 | - log.warning('File contains non-xml part {0} that could not be parsed' | ||
| 727 | - .format(subfile)) | ||
| 728 | - | ||
| 729 | - if content_type.startswith('application/vnd.ms-excel.'): | ||
| 730 | - dde_links.extend(process_xlsb(subfile, content_type, handle)) | ||
| 731 | - raise NotImplementedError('Continue reverse-engineering') | ||
| 732 | - else: | ||
| 733 | - magic = handle.read(len(olefile.MAGIC)) | ||
| 734 | - if magic == olefile.MAGIC: | ||
| 735 | - log.debug('found ole file {0} in excel ooxml'.format(subfile)) | ||
| 736 | - raise NotImplementedError('continue. need to reset stream') | 727 | + if content_type == 'application/vnd.openxmlformats-officedocument.' + \ |
| 728 | + 'spreadsheetml.printerSettings': | ||
| 729 | + continue # printer settings | ||
| 730 | + if not content_type.startswith('application/vnd.ms-excel.') and \ | ||
| 731 | + not content_type.startswith('application/vnd.ms-office.'): # pylint: disable=bad-indentation | ||
| 732 | + logging.warning('Unexpected content type: ' + content_type) | ||
| 733 | + # try parsing anyway | ||
| 734 | + | ||
| 735 | + logging.info('Parsing non-xml subfile {0} with content type {1}' | ||
| 736 | + .format(subfile, content_type)) | ||
| 737 | + for record in xls_parser.parse_xlsb_part(handle, content_type, subfile): | ||
| 738 | + logging.debug('{0}: {1}'.format(subfile, record)) | ||
| 739 | + if isinstance(record, xls_parser.XlsbBeginSupBook) and \ | ||
| 740 | + record.link_type == \ | ||
| 741 | + xls_parser.XlsbBeginSupBook.LINK_TYPE_DDE: | ||
| 742 | + dde_links.append('DDE-Link ' + record.string1 + ' ' + | ||
| 743 | + record.string2) | ||
| 737 | 744 | ||
| 738 | return u'\n'.join(dde_links) | 745 | return u'\n'.join(dde_links) |
| 739 | 746 | ||
| 740 | 747 | ||
| 741 | -def process_xlsb(subfile, content_type, stream): | ||
| 742 | - """ Process data contained in a binary part of an OOXML excel file | ||
| 743 | - | ||
| 744 | - lots of these in xlsb files | ||
| 745 | - | ||
| 746 | - Work in progress, always returns [] | ||
| 747 | - | ||
| 748 | - Format of these streams seems to roughly have record-like structure like | ||
| 749 | - xls files (see xls_parser.py), but have to guess a lot since I could not | ||
| 750 | - find proper description in [MS-XLSB] nor [ECMA-376] nor [MS-OE376]. The | ||
| 751 | - code here is reverse-engineered from comparing dde-test.xlsb and | ||
| 752 | - dde-test.xlsx | ||
| 753 | - | ||
| 754 | - The author of | ||
| 755 | - https://www.codeproject.com/Articles/15216/Office-bin-file-format seems to | ||
| 756 | - have tried to reverse-engineer several .bin streams based on the assumption | ||
| 757 | - they contain BIFF data. | ||
| 758 | - | ||
| 759 | - Anyway, need more test samples to get any reliable results from this. | ||
| 760 | - """ | ||
| 761 | - log.debug('Trying to parse subfile {0}'.format(subfile)) | ||
| 762 | - while True: | ||
| 763 | - data = stream.read(3) | ||
| 764 | - if not data: | ||
| 765 | - break # end of stream | ||
| 766 | - type = ord(data[0]) | ||
| 767 | - unknown = ord(data[1]) | ||
| 768 | - size = ord(data[2]) | ||
| 769 | - data = stream.read(size) | ||
| 770 | - | ||
| 771 | - log.debug('Record of type {0} unknown part {1} and size {2}: {3}' | ||
| 772 | - .format(type, unknown, size, data[:64])) | ||
| 773 | - if len(data) != size: | ||
| 774 | - log.warning('Stream in {0} does not seem to fit record structure. ' | ||
| 775 | - .format(subfile) + | ||
| 776 | - '(read {0} bytes but expected {1})' | ||
| 777 | - .format(len(data), size)) | ||
| 778 | - return [] | ||
| 779 | - | ||
| 780 | - | ||
| 781 | def process_file(filepath, field_filter_mode=None): | 748 | def process_file(filepath, field_filter_mode=None): |
| 782 | """ decides which of process_doc/x or process_xls/x to call """ | 749 | """ decides which of process_doc/x or process_xls/x to call """ |
| 783 | if olefile.isOleFile(filepath): | 750 | if olefile.isOleFile(filepath): |