Commit 42a369f9f75750ea9e16d7c7c47c3577343d82bd
1 parent
80d72312
oleobj: detect external relationships
By using ooxml we can iterate it through the XML files searching for external links in documents relationships.
Showing
1 changed file
with
44 additions
and
1 deletions
oletools/oleobj.py
| @@ -70,7 +70,7 @@ except ImportError: | @@ -70,7 +70,7 @@ except ImportError: | ||
| 70 | from oletools.thirdparty import xglob | 70 | from oletools.thirdparty import xglob |
| 71 | from oletools.ppt_record_parser import (is_ppt, PptFile, | 71 | from oletools.ppt_record_parser import (is_ppt, PptFile, |
| 72 | PptRecordExOleVbaActiveXAtom) | 72 | PptRecordExOleVbaActiveXAtom) |
| 73 | -from oletools.ooxml import ZipSubFile | 73 | +from oletools.ooxml import XmlParser, ZipSubFile |
| 74 | 74 | ||
| 75 | # ----------------------------------------------------------------------------- | 75 | # ----------------------------------------------------------------------------- |
| 76 | # CHANGELOG: | 76 | # CHANGELOG: |
| @@ -178,6 +178,7 @@ else: | @@ -178,6 +178,7 @@ else: | ||
| 178 | NULL_CHAR = 0 # pylint: disable=redefined-variable-type | 178 | NULL_CHAR = 0 # pylint: disable=redefined-variable-type |
| 179 | xrange = range # pylint: disable=redefined-builtin, invalid-name | 179 | xrange = range # pylint: disable=redefined-builtin, invalid-name |
| 180 | 180 | ||
| 181 | +OOXML_RELATIONSHIP_TAG = '{http://schemas.openxmlformats.org/package/2006/relationships}Relationship' | ||
| 181 | 182 | ||
| 182 | # === GLOBAL VARIABLES ======================================================== | 183 | # === GLOBAL VARIABLES ======================================================== |
| 183 | 184 | ||
| @@ -203,6 +204,24 @@ RETURN_ERR_ARGS = 2 # reserve for OptionParser.parse_args | @@ -203,6 +204,24 @@ RETURN_ERR_ARGS = 2 # reserve for OptionParser.parse_args | ||
| 203 | RETURN_ERR_STREAM = 4 # error opening/parsing a stream | 204 | RETURN_ERR_STREAM = 4 # error opening/parsing a stream |
| 204 | RETURN_ERR_DUMP = 8 # error dumping data from stream to file | 205 | RETURN_ERR_DUMP = 8 # error dumping data from stream to file |
| 205 | 206 | ||
| 207 | +# Not sure if they can all be "External", but just in case | ||
| 208 | +BLACKLISTED_RELATIONSHIP_TYPES = [ | ||
| 209 | + 'attachedTemplate', | ||
| 210 | + 'externalLink', | ||
| 211 | + 'externalLinkPath', | ||
| 212 | + 'externalReference' | ||
| 213 | + 'frame' | ||
| 214 | + 'hyperlink', | ||
| 215 | + 'officeDocument', | ||
| 216 | + 'oleObject', | ||
| 217 | + 'package', | ||
| 218 | + 'slideUpdateUrl', | ||
| 219 | + 'slideMaster', | ||
| 220 | + 'slide', | ||
| 221 | + 'slideUpdateInfo', | ||
| 222 | + 'subDocument', | ||
| 223 | + 'worksheet' | ||
| 224 | +] | ||
| 206 | 225 | ||
| 207 | # === FUNCTIONS =============================================================== | 226 | # === FUNCTIONS =============================================================== |
| 208 | 227 | ||
| @@ -671,6 +690,22 @@ def find_ole(filename, data): | @@ -671,6 +690,22 @@ def find_ole(filename, data): | ||
| 671 | ole.close() | 690 | ole.close() |
| 672 | 691 | ||
| 673 | 692 | ||
| 693 | +def find_external_relationships(xml_parser): | ||
| 694 | + """ iterate XML files looking for relationships to external objects | ||
| 695 | + """ | ||
| 696 | + for _, elem, _ in xml_parser.iter_xml(None, False, OOXML_RELATIONSHIP_TAG): | ||
| 697 | + try: | ||
| 698 | + if elem.attrib['TargetMode'] == 'External': | ||
| 699 | + relationship_type = elem.attrib['Type'].rsplit('/', 1)[1] | ||
| 700 | + | ||
| 701 | + if relationship_type in BLACKLISTED_RELATIONSHIP_TYPES: | ||
| 702 | + yield relationship_type, elem.attrib['Target'] | ||
| 703 | + except (AttributeError, KeyError): | ||
| 704 | + # ignore missing attributes - Word won't detect | ||
| 705 | + # external links anyway | ||
| 706 | + pass | ||
| 707 | + | ||
| 708 | + | ||
| 674 | def process_file(filename, data, output_dir=None): | 709 | def process_file(filename, data, output_dir=None): |
| 675 | """ find embedded objects in given file | 710 | """ find embedded objects in given file |
| 676 | 711 | ||
| @@ -703,6 +738,14 @@ def process_file(filename, data, output_dir=None): | @@ -703,6 +738,14 @@ def process_file(filename, data, output_dir=None): | ||
| 703 | err_dumping = False | 738 | err_dumping = False |
| 704 | did_dump = False | 739 | did_dump = False |
| 705 | 740 | ||
| 741 | + if is_zipfile(filename): | ||
| 742 | + log.info('file is a OOXML file, looking for relationships with external files') | ||
| 743 | + xml_parser = XmlParser(filename) | ||
| 744 | + for relationship, target in find_external_relationships(xml_parser): | ||
| 745 | + did_dump = True | ||
| 746 | + print("Found relationship '%s' with external file %s" % (relationship, target)) | ||
| 747 | + | ||
| 748 | + | ||
| 706 | # look for ole files inside file (e.g. unzip docx) | 749 | # look for ole files inside file (e.g. unzip docx) |
| 707 | # have to finish work on every ole stream inside iteration, since handles | 750 | # have to finish work on every ole stream inside iteration, since handles |
| 708 | # are closed in find_ole | 751 | # are closed in find_ole |