Commit 8a2883660061b35ed3dd08ed0eb8f1f9a8e5378e

Authored by Christian Herdtweck
Committed by Philippe Lagadec
1 parent ed6b0de4

msodde: first integration of simple xls parsing

Showing 1 changed file with 23 additions and 2 deletions
oletools/msodde.py
@@ -92,6 +92,7 @@ if not _parent_dir in sys.path: @@ -92,6 +92,7 @@ if not _parent_dir in sys.path:
92 92
93 from oletools.thirdparty import olefile 93 from oletools.thirdparty import olefile
94 import oletools.ooxml as ooxml 94 import oletools.ooxml as ooxml
  95 +from oletools import xls_parser
95 96
96 # === PYTHON 2+3 SUPPORT ====================================================== 97 # === PYTHON 2+3 SUPPORT ======================================================
97 98
@@ -501,7 +502,7 @@ def process_ole(filepath): @@ -501,7 +502,7 @@ def process_ole(filepath):
501 find dde links in ole file 502 find dde links in ole file
502 503
503 like process_xml, returns a concatenated unicode string of dde links or 504 like process_xml, returns a concatenated unicode string of dde links or
504 - empty if none were found. dde-links will still being with the dde[auto] key 505 + empty if none were found. dde-links will still begin with the dde[auto] key
505 word (possibly after some whitespace) 506 word (possibly after some whitespace)
506 """ 507 """
507 log.debug('process_ole') 508 log.debug('process_ole')
@@ -512,6 +513,23 @@ def process_ole(filepath): @@ -512,6 +513,23 @@ def process_ole(filepath):
512 return u'\n'.join(text_parts) 513 return u'\n'.join(text_parts)
513 514
514 515
  516 +def process_xls(filepath):
  517 + """ find dde links in excel ole file """
  518 +
  519 + result = []
  520 + for stream in xls_parser.XlsFile(filepath).get_streams():
  521 + if not isinstance(stream, xls_parser.WorkbookStream):
  522 + continue
  523 + for record in stream.iter_records():
  524 + if not isinstance(record, xls_parser.XlsRecordSupBook):
  525 + continue
  526 + if record.support_link_type in (
  527 + xls_parser.XlsRecordSupBook.LINK_TYPE_OLE_DDE,
  528 + xls_parser.XlsRecordSupBook.LINK_TYPE_EXTERNAL):
  529 + result.append(record.virt_path)
  530 + return u'\n'.join(result)
  531 +
  532 +
515 def process_docx(filepath, field_filter_mode=None): 533 def process_docx(filepath, field_filter_mode=None):
516 log.debug('process_docx') 534 log.debug('process_docx')
517 all_fields = [] 535 all_fields = []
@@ -715,7 +733,10 @@ def process_xlsx(filepath, filed_filter_mode=None): @@ -715,7 +733,10 @@ def process_xlsx(filepath, filed_filter_mode=None):
715 def process_file(filepath, field_filter_mode=None): 733 def process_file(filepath, field_filter_mode=None):
716 """ decides to either call process_docx or process_ole or process_xlsx """ 734 """ decides to either call process_docx or process_ole or process_xlsx """
717 if olefile.isOleFile(filepath): 735 if olefile.isOleFile(filepath):
718 - return process_ole(filepath) 736 + if xls_parser.is_xls(filepath):
  737 + return process_xls(filepath)
  738 + else:
  739 + return process_ole(filepath)
719 try: 740 try:
720 doctype = ooxml.get_type(filepath) 741 doctype = ooxml.get_type(filepath)
721 log.debug('Detected file type: {0}'.format(doctype)) 742 log.debug('Detected file type: {0}'.format(doctype))