Commit 5862969f6c8c8e541f701fe02280289e4f456672
1 parent
4f1e8a8d
msodde: process excel-2003 xml files
Not sure whether this code generalizes to newer xml format from excel 2007+, did not manage to create test data with DDE links. Maybe the 2007+ xml does not support DDE links
Showing
1 changed file
with
33 additions
and
0 deletions
oletools/msodde.py
| ... | ... | @@ -926,6 +926,36 @@ def process_csv_dialect(file_handle, delimiters): |
| 926 | 926 | return results, dialect |
| 927 | 927 | |
| 928 | 928 | |
| 929 | +#: format of dde formula in excel xml files | |
| 930 | +XML_DDE_FORMAT = CSV_DDE_FORMAT | |
| 931 | + | |
| 932 | + | |
| 933 | +def process_excel_xml(filepath): | |
| 934 | + """ find dde links in xml files created with excel 2003 or excel 2007+ | |
| 935 | + | |
| 936 | + TODO: did not manage to create dde-link in the 2007+-xml-format. Find out | |
| 937 | + whether this is possible at all. If so, extend this function | |
| 938 | + """ | |
| 939 | + dde_links = [] | |
| 940 | + parser = ooxml.XmlParser(filepath) | |
| 941 | + for _, elem, _ in parser.iter_xml(): | |
| 942 | + tag = elem.tag.lower() | |
| 943 | + if tag != 'cell' and not tag.endswith('}cell'): | |
| 944 | + continue # we are only interested in cells | |
| 945 | + formula = None | |
| 946 | + for key in elem.keys(): | |
| 947 | + if key.lower() == 'formula' or key.lower().endswith('}formula'): | |
| 948 | + formula = elem.get(key) | |
| 949 | + break | |
| 950 | + if formula is None: | |
| 951 | + continue | |
| 952 | + log.debug('found cell with formula {0}'.format(formula)) | |
| 953 | + match = re.match(XML_DDE_FORMAT, formula) | |
| 954 | + if match: | |
| 955 | + dde_links.append(u' '.join(match.groups()[:2])) | |
| 956 | + return u'\n'.join(dde_links) | |
| 957 | + | |
| 958 | + | |
| 929 | 959 | def process_file(filepath, field_filter_mode=None): |
| 930 | 960 | """ decides which of the process_* functions to call """ |
| 931 | 961 | if olefile.isOleFile(filepath): |
| ... | ... | @@ -952,6 +982,9 @@ def process_file(filepath, field_filter_mode=None): |
| 952 | 982 | if doctype == ooxml.DOCTYPE_EXCEL: |
| 953 | 983 | log.debug('Process file as excel 2007+ (xlsx)') |
| 954 | 984 | return process_xlsx(filepath) |
| 985 | + elif doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003): | |
| 986 | + log.debug('Process file as xml from excel 2003/2007+') | |
| 987 | + return process_excel_xml(filepath) | |
| 955 | 988 | elif doctype is None: |
| 956 | 989 | log.debug('Process file as csv') |
| 957 | 990 | return process_csv(filepath) | ... | ... |