Commit d9da61845f1a6be240184a2557a40cbca2cd3b76
Committed by
Philippe Lagadec
1 parent
f0a52502
msodde: find dde links in xlsb
Showing
1 changed file
with
18 additions
and
51 deletions
oletools/msodde.py
| ... | ... | @@ -722,62 +722,29 @@ def process_xlsx(filepath, filed_filter_mode=None): |
| 722 | 722 | link_info.append(elem.attrib['ddeTopic']) |
| 723 | 723 | dde_links.append(' '.join(link_info)) |
| 724 | 724 | |
| 725 | + # binary parts, e.g. contained in .xlsb | |
| 725 | 726 | for subfile, content_type, handle in parser.iter_non_xml(): |
| 726 | - log.warning('File contains non-xml part {0} that could not be parsed' | |
| 727 | - .format(subfile)) | |
| 728 | - | |
| 729 | - if content_type.startswith('application/vnd.ms-excel.'): | |
| 730 | - dde_links.extend(process_xlsb(subfile, content_type, handle)) | |
| 731 | - raise NotImplementedError('Continue reverse-engineering') | |
| 732 | - else: | |
| 733 | - magic = handle.read(len(olefile.MAGIC)) | |
| 734 | - if magic == olefile.MAGIC: | |
| 735 | - log.debug('found ole file {0} in excel ooxml'.format(subfile)) | |
| 736 | - raise NotImplementedError('continue. need to reset stream') | |
| 727 | + if content_type == 'application/vnd.openxmlformats-officedocument.' + \ | |
| 728 | + 'spreadsheetml.printerSettings': | |
| 729 | + continue # printer settings | |
| 730 | + if not content_type.startswith('application/vnd.ms-excel.') and \ | |
| 731 | + not content_type.startswith('application/vnd.ms-office.'): # pylint: disable=bad-indentation | |
| 732 | + logging.warning('Unexpected content type: ' + content_type) | |
| 733 | + # try parsing anyway | |
| 734 | + | |
| 735 | + logging.info('Parsing non-xml subfile {0} with content type {1}' | |
| 736 | + .format(subfile, content_type)) | |
| 737 | + for record in xls_parser.parse_xlsb_part(handle, content_type, subfile): | |
| 738 | + logging.debug('{0}: {1}'.format(subfile, record)) | |
| 739 | + if isinstance(record, xls_parser.XlsbBeginSupBook) and \ | |
| 740 | + record.link_type == \ | |
| 741 | + xls_parser.XlsbBeginSupBook.LINK_TYPE_DDE: | |
| 742 | + dde_links.append('DDE-Link ' + record.string1 + ' ' + | |
| 743 | + record.string2) | |
| 737 | 744 | |
| 738 | 745 | return u'\n'.join(dde_links) |
| 739 | 746 | |
| 740 | 747 | |
| 741 | -def process_xlsb(subfile, content_type, stream): | |
| 742 | - """ Process data contained in a binary part of an OOXML excel file | |
| 743 | - | |
| 744 | - lots of these in xlsb files | |
| 745 | - | |
| 746 | - Work in progress, always returns [] | |
| 747 | - | |
| 748 | - Format of these streams seems to roughly have record-like structure like | |
| 749 | - xls files (see xls_parser.py), but have to guess a lot since I could not | |
| 750 | - find proper description in [MS-XLSB] nor [ECMA-376] nor [MS-OE376]. The | |
| 751 | - code here is reverse-engineered from comparing dde-test.xlsb and | |
| 752 | - dde-test.xlsx | |
| 753 | - | |
| 754 | - The author of | |
| 755 | - https://www.codeproject.com/Articles/15216/Office-bin-file-format seems to | |
| 756 | - have tried to reverse-engineer several .bin streams based on the assumption | |
| 757 | - they contain BIFF data. | |
| 758 | - | |
| 759 | - Anyway, need more test samples to get any reliable results from this. | |
| 760 | - """ | |
| 761 | - log.debug('Trying to parse subfile {0}'.format(subfile)) | |
| 762 | - while True: | |
| 763 | - data = stream.read(3) | |
| 764 | - if not data: | |
| 765 | - break # end of stream | |
| 766 | - type = ord(data[0]) | |
| 767 | - unknown = ord(data[1]) | |
| 768 | - size = ord(data[2]) | |
| 769 | - data = stream.read(size) | |
| 770 | - | |
| 771 | - log.debug('Record of type {0} unknown part {1} and size {2}: {3}' | |
| 772 | - .format(type, unknown, size, data[:64])) | |
| 773 | - if len(data) != size: | |
| 774 | - log.warning('Stream in {0} does not seem to fit record structure. ' | |
| 775 | - .format(subfile) + | |
| 776 | - '(read {0} bytes but expected {1})' | |
| 777 | - .format(len(data), size)) | |
| 778 | - return [] | |
| 779 | - | |
| 780 | - | |
| 781 | 748 | def process_file(filepath, field_filter_mode=None): |
| 782 | 749 | """ decides which of process_doc/x or process_xls/x to call """ |
| 783 | 750 | if olefile.isOleFile(filepath): | ... | ... |