Commit 5862969f6c8c8e541f701fe02280289e4f456672
1 parent
4f1e8a8d
msodde: process excel-2003 xml files
Not sure whether this code generalizes to newer xml format from excel 2007+, did not manage to create test data with DDE links. Maybe the 2007+ xml does not support DDE links
Showing
1 changed file
with
33 additions
and
0 deletions
oletools/msodde.py
| @@ -926,6 +926,36 @@ def process_csv_dialect(file_handle, delimiters): | @@ -926,6 +926,36 @@ def process_csv_dialect(file_handle, delimiters): | ||
| 926 | return results, dialect | 926 | return results, dialect |
| 927 | 927 | ||
| 928 | 928 | ||
| 929 | +#: format of dde formula in excel xml files | ||
| 930 | +XML_DDE_FORMAT = CSV_DDE_FORMAT | ||
| 931 | + | ||
| 932 | + | ||
| 933 | +def process_excel_xml(filepath): | ||
| 934 | + """ find dde links in xml files created with excel 2003 or excel 2007+ | ||
| 935 | + | ||
| 936 | + TODO: did not manage to create dde-link in the 2007+-xml-format. Find out | ||
| 937 | + whether this is possible at all. If so, extend this function | ||
| 938 | + """ | ||
| 939 | + dde_links = [] | ||
| 940 | + parser = ooxml.XmlParser(filepath) | ||
| 941 | + for _, elem, _ in parser.iter_xml(): | ||
| 942 | + tag = elem.tag.lower() | ||
| 943 | + if tag != 'cell' and not tag.endswith('}cell'): | ||
| 944 | + continue # we are only interested in cells | ||
| 945 | + formula = None | ||
| 946 | + for key in elem.keys(): | ||
| 947 | + if key.lower() == 'formula' or key.lower().endswith('}formula'): | ||
| 948 | + formula = elem.get(key) | ||
| 949 | + break | ||
| 950 | + if formula is None: | ||
| 951 | + continue | ||
| 952 | + log.debug('found cell with formula {0}'.format(formula)) | ||
| 953 | + match = re.match(XML_DDE_FORMAT, formula) | ||
| 954 | + if match: | ||
| 955 | + dde_links.append(u' '.join(match.groups()[:2])) | ||
| 956 | + return u'\n'.join(dde_links) | ||
| 957 | + | ||
| 958 | + | ||
| 929 | def process_file(filepath, field_filter_mode=None): | 959 | def process_file(filepath, field_filter_mode=None): |
| 930 | """ decides which of the process_* functions to call """ | 960 | """ decides which of the process_* functions to call """ |
| 931 | if olefile.isOleFile(filepath): | 961 | if olefile.isOleFile(filepath): |
| @@ -952,6 +982,9 @@ def process_file(filepath, field_filter_mode=None): | @@ -952,6 +982,9 @@ def process_file(filepath, field_filter_mode=None): | ||
| 952 | if doctype == ooxml.DOCTYPE_EXCEL: | 982 | if doctype == ooxml.DOCTYPE_EXCEL: |
| 953 | log.debug('Process file as excel 2007+ (xlsx)') | 983 | log.debug('Process file as excel 2007+ (xlsx)') |
| 954 | return process_xlsx(filepath) | 984 | return process_xlsx(filepath) |
| 985 | + elif doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003): | ||
| 986 | + log.debug('Process file as xml from excel 2003/2007+') | ||
| 987 | + return process_excel_xml(filepath) | ||
| 955 | elif doctype is None: | 988 | elif doctype is None: |
| 956 | log.debug('Process file as csv') | 989 | log.debug('Process file as csv') |
| 957 | return process_csv(filepath) | 990 | return process_csv(filepath) |