Commit 8a2883660061b35ed3dd08ed0eb8f1f9a8e5378e
Committed by
Philippe Lagadec
1 parent
ed6b0de4
msodde: first integration of simple xls parsing
Showing
1 changed file
with
23 additions
and
2 deletions
oletools/msodde.py
| @@ -92,6 +92,7 @@ if not _parent_dir in sys.path: | @@ -92,6 +92,7 @@ if not _parent_dir in sys.path: | ||
| 92 | 92 | ||
| 93 | from oletools.thirdparty import olefile | 93 | from oletools.thirdparty import olefile |
| 94 | import oletools.ooxml as ooxml | 94 | import oletools.ooxml as ooxml |
| 95 | +from oletools import xls_parser | ||
| 95 | 96 | ||
| 96 | # === PYTHON 2+3 SUPPORT ====================================================== | 97 | # === PYTHON 2+3 SUPPORT ====================================================== |
| 97 | 98 | ||
| @@ -501,7 +502,7 @@ def process_ole(filepath): | @@ -501,7 +502,7 @@ def process_ole(filepath): | ||
| 501 | find dde links in ole file | 502 | find dde links in ole file |
| 502 | 503 | ||
| 503 | like process_xml, returns a concatenated unicode string of dde links or | 504 | like process_xml, returns a concatenated unicode string of dde links or |
| 504 | - empty if none were found. dde-links will still being with the dde[auto] key | 505 | + empty if none were found. dde-links will still begin with the dde[auto] key |
| 505 | word (possibly after some whitespace) | 506 | word (possibly after some whitespace) |
| 506 | """ | 507 | """ |
| 507 | log.debug('process_ole') | 508 | log.debug('process_ole') |
| @@ -512,6 +513,23 @@ def process_ole(filepath): | @@ -512,6 +513,23 @@ def process_ole(filepath): | ||
| 512 | return u'\n'.join(text_parts) | 513 | return u'\n'.join(text_parts) |
| 513 | 514 | ||
| 514 | 515 | ||
| 516 | +def process_xls(filepath): | ||
| 517 | + """ find dde links in excel ole file """ | ||
| 518 | + | ||
| 519 | + result = [] | ||
| 520 | + for stream in xls_parser.XlsFile(filepath).get_streams(): | ||
| 521 | + if not isinstance(stream, xls_parser.WorkbookStream): | ||
| 522 | + continue | ||
| 523 | + for record in stream.iter_records(): | ||
| 524 | + if not isinstance(record, xls_parser.XlsRecordSupBook): | ||
| 525 | + continue | ||
| 526 | + if record.support_link_type in ( | ||
| 527 | + xls_parser.XlsRecordSupBook.LINK_TYPE_OLE_DDE, | ||
| 528 | + xls_parser.XlsRecordSupBook.LINK_TYPE_EXTERNAL): | ||
| 529 | + result.append(record.virt_path) | ||
| 530 | + return u'\n'.join(result) | ||
| 531 | + | ||
| 532 | + | ||
| 515 | def process_docx(filepath, field_filter_mode=None): | 533 | def process_docx(filepath, field_filter_mode=None): |
| 516 | log.debug('process_docx') | 534 | log.debug('process_docx') |
| 517 | all_fields = [] | 535 | all_fields = [] |
| @@ -715,7 +733,10 @@ def process_xlsx(filepath, filed_filter_mode=None): | @@ -715,7 +733,10 @@ def process_xlsx(filepath, filed_filter_mode=None): | ||
| 715 | def process_file(filepath, field_filter_mode=None): | 733 | def process_file(filepath, field_filter_mode=None): |
| 716 | """ decides to either call process_docx or process_ole or process_xlsx """ | 734 | """ decides to either call process_docx or process_ole or process_xlsx """ |
| 717 | if olefile.isOleFile(filepath): | 735 | if olefile.isOleFile(filepath): |
| 718 | - return process_ole(filepath) | 736 | + if xls_parser.is_xls(filepath): |
| 737 | + return process_xls(filepath) | ||
| 738 | + else: | ||
| 739 | + return process_ole(filepath) | ||
| 719 | try: | 740 | try: |
| 720 | doctype = ooxml.get_type(filepath) | 741 | doctype = ooxml.get_type(filepath) |
| 721 | log.debug('Detected file type: {0}'.format(doctype)) | 742 | log.debug('Detected file type: {0}'.format(doctype)) |