Commit 8a2883660061b35ed3dd08ed0eb8f1f9a8e5378e
Committed by
Philippe Lagadec
1 parent
ed6b0de4
msodde: first integration of simple xls parsing
Showing
1 changed file
with
23 additions
and
2 deletions
oletools/msodde.py
| ... | ... | @@ -92,6 +92,7 @@ if not _parent_dir in sys.path: |
| 92 | 92 | |
| 93 | 93 | from oletools.thirdparty import olefile |
| 94 | 94 | import oletools.ooxml as ooxml |
| 95 | +from oletools import xls_parser | |
| 95 | 96 | |
| 96 | 97 | # === PYTHON 2+3 SUPPORT ====================================================== |
| 97 | 98 | |
| ... | ... | @@ -501,7 +502,7 @@ def process_ole(filepath): |
| 501 | 502 | find dde links in ole file |
| 502 | 503 | |
| 503 | 504 | like process_xml, returns a concatenated unicode string of dde links or |
| 504 | - empty if none were found. dde-links will still being with the dde[auto] key | |
| 505 | + empty if none were found. dde-links will still begin with the dde[auto] key | |
| 505 | 506 | word (possibly after some whitespace) |
| 506 | 507 | """ |
| 507 | 508 | log.debug('process_ole') |
| ... | ... | @@ -512,6 +513,23 @@ def process_ole(filepath): |
| 512 | 513 | return u'\n'.join(text_parts) |
| 513 | 514 | |
| 514 | 515 | |
| 516 | +def process_xls(filepath): | |
| 517 | + """ find dde links in excel ole file """ | |
| 518 | + | |
| 519 | + result = [] | |
| 520 | + for stream in xls_parser.XlsFile(filepath).get_streams(): | |
| 521 | + if not isinstance(stream, xls_parser.WorkbookStream): | |
| 522 | + continue | |
| 523 | + for record in stream.iter_records(): | |
| 524 | + if not isinstance(record, xls_parser.XlsRecordSupBook): | |
| 525 | + continue | |
| 526 | + if record.support_link_type in ( | |
| 527 | + xls_parser.XlsRecordSupBook.LINK_TYPE_OLE_DDE, | |
| 528 | + xls_parser.XlsRecordSupBook.LINK_TYPE_EXTERNAL): | |
| 529 | + result.append(record.virt_path) | |
| 530 | + return u'\n'.join(result) | |
| 531 | + | |
| 532 | + | |
| 515 | 533 | def process_docx(filepath, field_filter_mode=None): |
| 516 | 534 | log.debug('process_docx') |
| 517 | 535 | all_fields = [] |
| ... | ... | @@ -715,7 +733,10 @@ def process_xlsx(filepath, filed_filter_mode=None): |
| 715 | 733 | def process_file(filepath, field_filter_mode=None): |
| 716 | 734 | """ decides to either call process_docx or process_ole or process_xlsx """ |
| 717 | 735 | if olefile.isOleFile(filepath): |
| 718 | - return process_ole(filepath) | |
| 736 | + if xls_parser.is_xls(filepath): | |
| 737 | + return process_xls(filepath) | |
| 738 | + else: | |
| 739 | + return process_ole(filepath) | |
| 719 | 740 | try: |
| 720 | 741 | doctype = ooxml.get_type(filepath) |
| 721 | 742 | log.debug('Detected file type: {0}'.format(doctype)) | ... | ... |