Commit 5862969f6c8c8e541f701fe02280289e4f456672

Authored by Christian Herdtweck
1 parent 4f1e8a8d

msodde: process excel-2003 xml files

Not sure whether this code generalizes to newer xml format from excel 2007+,
did not manage to create test data with DDE links. Maybe the 2007+ xml does
not support DDE links
Showing 1 changed file with 33 additions and 0 deletions
oletools/msodde.py
@@ -926,6 +926,36 @@ def process_csv_dialect(file_handle, delimiters): @@ -926,6 +926,36 @@ def process_csv_dialect(file_handle, delimiters):
926 return results, dialect 926 return results, dialect
927 927
928 928
  929 +#: format of dde formula in excel xml files
  930 +XML_DDE_FORMAT = CSV_DDE_FORMAT
  931 +
  932 +
  933 +def process_excel_xml(filepath):
  934 + """ find dde links in xml files created with excel 2003 or excel 2007+
  935 +
  936 + TODO: did not manage to create dde-link in the 2007+-xml-format. Find out
  937 + whether this is possible at all. If so, extend this function
  938 + """
  939 + dde_links = []
  940 + parser = ooxml.XmlParser(filepath)
  941 + for _, elem, _ in parser.iter_xml():
  942 + tag = elem.tag.lower()
  943 + if tag != 'cell' and not tag.endswith('}cell'):
  944 + continue # we are only interested in cells
  945 + formula = None
  946 + for key in elem.keys():
  947 + if key.lower() == 'formula' or key.lower().endswith('}formula'):
  948 + formula = elem.get(key)
  949 + break
  950 + if formula is None:
  951 + continue
  952 + log.debug('found cell with formula {0}'.format(formula))
  953 + match = re.match(XML_DDE_FORMAT, formula)
  954 + if match:
  955 + dde_links.append(u' '.join(match.groups()[:2]))
  956 + return u'\n'.join(dde_links)
  957 +
  958 +
929 def process_file(filepath, field_filter_mode=None): 959 def process_file(filepath, field_filter_mode=None):
930 """ decides which of the process_* functions to call """ 960 """ decides which of the process_* functions to call """
931 if olefile.isOleFile(filepath): 961 if olefile.isOleFile(filepath):
@@ -952,6 +982,9 @@ def process_file(filepath, field_filter_mode=None): @@ -952,6 +982,9 @@ def process_file(filepath, field_filter_mode=None):
952 if doctype == ooxml.DOCTYPE_EXCEL: 982 if doctype == ooxml.DOCTYPE_EXCEL:
953 log.debug('Process file as excel 2007+ (xlsx)') 983 log.debug('Process file as excel 2007+ (xlsx)')
954 return process_xlsx(filepath) 984 return process_xlsx(filepath)
  985 + elif doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
  986 + log.debug('Process file as xml from excel 2003/2007+')
  987 + return process_excel_xml(filepath)
955 elif doctype is None: 988 elif doctype is None:
956 log.debug('Process file as csv') 989 log.debug('Process file as csv')
957 return process_csv(filepath) 990 return process_csv(filepath)