Commit 8a2883660061b35ed3dd08ed0eb8f1f9a8e5378e

Authored by Christian Herdtweck
Committed by Philippe Lagadec
1 parent ed6b0de4

msodde: first integration of simple xls parsing

Showing 1 changed file with 23 additions and 2 deletions
oletools/msodde.py
... ... @@ -92,6 +92,7 @@ if not _parent_dir in sys.path:
92 92  
93 93 from oletools.thirdparty import olefile
94 94 import oletools.ooxml as ooxml
  95 +from oletools import xls_parser
95 96  
96 97 # === PYTHON 2+3 SUPPORT ======================================================
97 98  
... ... @@ -501,7 +502,7 @@ def process_ole(filepath):
501 502 find dde links in ole file
502 503  
503 504 like process_xml, returns a concatenated unicode string of dde links or
504   - empty if none were found. dde-links will still being with the dde[auto] key
  505 + empty if none were found. dde-links will still begin with the dde[auto] key
505 506 word (possibly after some whitespace)
506 507 """
507 508 log.debug('process_ole')
... ... @@ -512,6 +513,23 @@ def process_ole(filepath):
512 513 return u'\n'.join(text_parts)
513 514  
514 515  
  516 +def process_xls(filepath):
  517 + """ find dde links in excel ole file """
  518 +
  519 + result = []
  520 + for stream in xls_parser.XlsFile(filepath).get_streams():
  521 + if not isinstance(stream, xls_parser.WorkbookStream):
  522 + continue
  523 + for record in stream.iter_records():
  524 + if not isinstance(record, xls_parser.XlsRecordSupBook):
  525 + continue
  526 + if record.support_link_type in (
  527 + xls_parser.XlsRecordSupBook.LINK_TYPE_OLE_DDE,
  528 + xls_parser.XlsRecordSupBook.LINK_TYPE_EXTERNAL):
  529 + result.append(record.virt_path)
  530 + return u'\n'.join(result)
  531 +
  532 +
515 533 def process_docx(filepath, field_filter_mode=None):
516 534 log.debug('process_docx')
517 535 all_fields = []
... ... @@ -715,7 +733,10 @@ def process_xlsx(filepath, filed_filter_mode=None):
715 733 def process_file(filepath, field_filter_mode=None):
716 734 """ decides to either call process_docx or process_ole or process_xlsx """
717 735 if olefile.isOleFile(filepath):
718   - return process_ole(filepath)
  736 + if xls_parser.is_xls(filepath):
  737 + return process_xls(filepath)
  738 + else:
  739 + return process_ole(filepath)
719 740 try:
720 741 doctype = ooxml.get_type(filepath)
721 742 log.debug('Detected file type: {0}'.format(doctype))
... ...