Commit caed0b2ad17a5318201fc1bed96746c2ab626346
1 parent
40718f9d
added msodde.py
Showing
1 changed file
with
132 additions
and
0 deletions
oletools/msodde.py
0 → 100644
| 1 | +#!/usr/bin/env python | |
| 2 | +""" | |
| 3 | +msodde.py | |
| 4 | + | |
| 5 | +msodde is a script to parse MS Office documents | |
| 6 | +(e.g. Word, Excel), to detect and extract DDE links. | |
| 7 | + | |
| 8 | +Supported formats: | |
| 9 | +- Word 2007+ (.docx, .dotx, .docm, .dotm) | |
| 10 | + | |
| 11 | +Author: Philippe Lagadec - http://www.decalage.info | |
| 12 | +License: BSD, see source code or documentation | |
| 13 | + | |
| 14 | +msodde is part of the python-oletools package: | |
| 15 | +http://www.decalage.info/python/oletools | |
| 16 | +""" | |
| 17 | + | |
| 18 | +# === LICENSE ================================================================== | |
| 19 | + | |
| 20 | +# msodde is copyright (c) 2017 Philippe Lagadec (http://www.decalage.info) | |
| 21 | +# All rights reserved. | |
| 22 | +# | |
| 23 | +# Redistribution and use in source and binary forms, with or without modification, | |
| 24 | +# are permitted provided that the following conditions are met: | |
| 25 | +# | |
| 26 | +# * Redistributions of source code must retain the above copyright notice, this | |
| 27 | +# list of conditions and the following disclaimer. | |
| 28 | +# * Redistributions in binary form must reproduce the above copyright notice, | |
| 29 | +# this list of conditions and the following disclaimer in the documentation | |
| 30 | +# and/or other materials provided with the distribution. | |
| 31 | +# | |
| 32 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 33 | +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 34 | +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 35 | +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 36 | +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 37 | +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 38 | +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 39 | +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 40 | +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 41 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 42 | + | |
| 43 | +from __future__ import print_function | |
| 44 | + | |
| 45 | +#------------------------------------------------------------------------------ | |
| 46 | +# CHANGELOG: | |
| 47 | +# 2017-10-18 v0.52 PL: - first version | |
| 48 | + | |
| 49 | +__version__ = '0.52dev1' | |
| 50 | + | |
| 51 | +#------------------------------------------------------------------------------ | |
| 52 | +# TODO: detect beginning/end of fields, to separate each field | |
| 53 | +# TODO: test if DDE links can also appear in headers, footers and other places | |
| 54 | +# TODO: add xlsx support | |
| 55 | + | |
| 56 | +#------------------------------------------------------------------------------ | |
| 57 | +# REFERENCES: | |
| 58 | + | |
| 59 | + | |
| 60 | +#--- IMPORTS ------------------------------------------------------------------ | |
| 61 | + | |
| 62 | +# import lxml or ElementTree for XML parsing: | |
| 63 | +try: | |
| 64 | + # lxml: best performance for XML processing | |
| 65 | + import lxml.etree as ET | |
| 66 | +except ImportError: | |
| 67 | + import xml.etree.cElementTree as ET | |
| 68 | + | |
| 69 | +import argparse | |
| 70 | +import zipfile | |
| 71 | +import os | |
| 72 | +import sys | |
| 73 | + | |
| 74 | + | |
| 75 | +# === CONSTANTS ============================================================== | |
| 76 | + | |
| 77 | + | |
| 78 | +NS_WORD = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' | |
| 79 | + | |
| 80 | +# XML tag for 'w:instrText' | |
| 81 | +TAG_W_INSTRTEXT = '{%s}instrText' % NS_WORD | |
| 82 | + | |
| 83 | + | |
| 84 | +# === FUNCTIONS ============================================================== | |
| 85 | + | |
| 86 | +def process_args(): | |
| 87 | + parser = argparse.ArgumentParser(description='A python tool to detect and extract DDE links in MS Office files') | |
| 88 | + parser.add_argument("filepath", help="path of the file to be analyzed") | |
| 89 | + | |
| 90 | + args = parser.parse_args() | |
| 91 | + | |
| 92 | + if not os.path.exists(args.filepath): | |
| 93 | + print('File {} does not exist.'.format(args.filepath)) | |
| 94 | + sys.exit(1) | |
| 95 | + | |
| 96 | + return args | |
| 97 | + | |
| 98 | + | |
| 99 | + | |
| 100 | +def process_file(filepath): | |
| 101 | + z = zipfile.ZipFile(filepath) | |
| 102 | + data = z.read('word/document.xml') | |
| 103 | + z.close() | |
| 104 | + # parse the XML data: | |
| 105 | + root = ET.fromstring(data) | |
| 106 | + text = u'' | |
| 107 | + # find all the tags 'w:instrText': | |
| 108 | + # (each is a chunk of a DDE link) | |
| 109 | + for elem in root.iter(TAG_W_INSTRTEXT): | |
| 110 | + # concatenate the text of the field: | |
| 111 | + text += elem.text | |
| 112 | + return text | |
| 113 | + | |
| 114 | + | |
| 115 | +#=== MAIN ================================================================= | |
| 116 | + | |
| 117 | +def main(): | |
| 118 | + # print banner with version | |
| 119 | + print ('msodde %s - http://decalage.info/python/oletools' % __version__) | |
| 120 | + print ('THIS IS WORK IN PROGRESS - Check updates regularly!') | |
| 121 | + print ('Please report any issue at https://github.com/decalage2/oletools/issues') | |
| 122 | + print ('') | |
| 123 | + | |
| 124 | + args = process_args() | |
| 125 | + print('Opening file: %s' % args.filepath) | |
| 126 | + text = process_file(args.filepath) | |
| 127 | + print ('DDE Links:') | |
| 128 | + print(text) | |
| 129 | + | |
| 130 | + | |
| 131 | +if __name__ == '__main__': | |
| 132 | + main() | ... | ... |