Commit caed0b2ad17a5318201fc1bed96746c2ab626346
1 parent
40718f9d
added msodde.py
Showing
1 changed file
with
132 additions
and
0 deletions
oletools/msodde.py
0 → 100644
| 1 | +#!/usr/bin/env python | ||
| 2 | +""" | ||
| 3 | +msodde.py | ||
| 4 | + | ||
| 5 | +msodde is a script to parse MS Office documents | ||
| 6 | +(e.g. Word, Excel), to detect and extract DDE links. | ||
| 7 | + | ||
| 8 | +Supported formats: | ||
| 9 | +- Word 2007+ (.docx, .dotx, .docm, .dotm) | ||
| 10 | + | ||
| 11 | +Author: Philippe Lagadec - http://www.decalage.info | ||
| 12 | +License: BSD, see source code or documentation | ||
| 13 | + | ||
| 14 | +msodde is part of the python-oletools package: | ||
| 15 | +http://www.decalage.info/python/oletools | ||
| 16 | +""" | ||
| 17 | + | ||
| 18 | +# === LICENSE ================================================================== | ||
| 19 | + | ||
| 20 | +# msodde is copyright (c) 2017 Philippe Lagadec (http://www.decalage.info) | ||
| 21 | +# All rights reserved. | ||
| 22 | +# | ||
| 23 | +# Redistribution and use in source and binary forms, with or without modification, | ||
| 24 | +# are permitted provided that the following conditions are met: | ||
| 25 | +# | ||
| 26 | +# * Redistributions of source code must retain the above copyright notice, this | ||
| 27 | +# list of conditions and the following disclaimer. | ||
| 28 | +# * Redistributions in binary form must reproduce the above copyright notice, | ||
| 29 | +# this list of conditions and the following disclaimer in the documentation | ||
| 30 | +# and/or other materials provided with the distribution. | ||
| 31 | +# | ||
| 32 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
| 33 | +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
| 34 | +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 35 | +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
| 36 | +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 37 | +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| 38 | +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
| 39 | +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| 40 | +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 41 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 42 | + | ||
| 43 | +from __future__ import print_function | ||
| 44 | + | ||
| 45 | +#------------------------------------------------------------------------------ | ||
| 46 | +# CHANGELOG: | ||
| 47 | +# 2017-10-18 v0.52 PL: - first version | ||
| 48 | + | ||
| 49 | +__version__ = '0.52dev1' | ||
| 50 | + | ||
| 51 | +#------------------------------------------------------------------------------ | ||
| 52 | +# TODO: detect beginning/end of fields, to separate each field | ||
| 53 | +# TODO: test if DDE links can also appear in headers, footers and other places | ||
| 54 | +# TODO: add xlsx support | ||
| 55 | + | ||
| 56 | +#------------------------------------------------------------------------------ | ||
| 57 | +# REFERENCES: | ||
| 58 | + | ||
| 59 | + | ||
| 60 | +#--- IMPORTS ------------------------------------------------------------------ | ||
| 61 | + | ||
| 62 | +# import lxml or ElementTree for XML parsing: | ||
| 63 | +try: | ||
| 64 | + # lxml: best performance for XML processing | ||
| 65 | + import lxml.etree as ET | ||
| 66 | +except ImportError: | ||
| 67 | + import xml.etree.cElementTree as ET | ||
| 68 | + | ||
| 69 | +import argparse | ||
| 70 | +import zipfile | ||
| 71 | +import os | ||
| 72 | +import sys | ||
| 73 | + | ||
| 74 | + | ||
| 75 | +# === CONSTANTS ============================================================== | ||
| 76 | + | ||
| 77 | + | ||
| 78 | +NS_WORD = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' | ||
| 79 | + | ||
| 80 | +# XML tag for 'w:instrText' | ||
| 81 | +TAG_W_INSTRTEXT = '{%s}instrText' % NS_WORD | ||
| 82 | + | ||
| 83 | + | ||
| 84 | +# === FUNCTIONS ============================================================== | ||
| 85 | + | ||
| 86 | +def process_args(): | ||
| 87 | + parser = argparse.ArgumentParser(description='A python tool to detect and extract DDE links in MS Office files') | ||
| 88 | + parser.add_argument("filepath", help="path of the file to be analyzed") | ||
| 89 | + | ||
| 90 | + args = parser.parse_args() | ||
| 91 | + | ||
| 92 | + if not os.path.exists(args.filepath): | ||
| 93 | + print('File {} does not exist.'.format(args.filepath)) | ||
| 94 | + sys.exit(1) | ||
| 95 | + | ||
| 96 | + return args | ||
| 97 | + | ||
| 98 | + | ||
| 99 | + | ||
| 100 | +def process_file(filepath): | ||
| 101 | + z = zipfile.ZipFile(filepath) | ||
| 102 | + data = z.read('word/document.xml') | ||
| 103 | + z.close() | ||
| 104 | + # parse the XML data: | ||
| 105 | + root = ET.fromstring(data) | ||
| 106 | + text = u'' | ||
| 107 | + # find all the tags 'w:instrText': | ||
| 108 | + # (each is a chunk of a DDE link) | ||
| 109 | + for elem in root.iter(TAG_W_INSTRTEXT): | ||
| 110 | + # concatenate the text of the field: | ||
| 111 | + text += elem.text | ||
| 112 | + return text | ||
| 113 | + | ||
| 114 | + | ||
| 115 | +#=== MAIN ================================================================= | ||
| 116 | + | ||
| 117 | +def main(): | ||
| 118 | + # print banner with version | ||
| 119 | + print ('msodde %s - http://decalage.info/python/oletools' % __version__) | ||
| 120 | + print ('THIS IS WORK IN PROGRESS - Check updates regularly!') | ||
| 121 | + print ('Please report any issue at https://github.com/decalage2/oletools/issues') | ||
| 122 | + print ('') | ||
| 123 | + | ||
| 124 | + args = process_args() | ||
| 125 | + print('Opening file: %s' % args.filepath) | ||
| 126 | + text = process_file(args.filepath) | ||
| 127 | + print ('DDE Links:') | ||
| 128 | + print(text) | ||
| 129 | + | ||
| 130 | + | ||
| 131 | +if __name__ == '__main__': | ||
| 132 | + main() |