Commit 42a369f9f75750ea9e16d7c7c47c3577343d82bd
1 parent
80d72312
oleobj: detect external relationships
By using ooxml we can iterate it through the XML files searching for external links in documents relationships.
Showing
1 changed file
with
44 additions
and
1 deletions
oletools/oleobj.py
| ... | ... | @@ -70,7 +70,7 @@ except ImportError: |
| 70 | 70 | from oletools.thirdparty import xglob |
| 71 | 71 | from oletools.ppt_record_parser import (is_ppt, PptFile, |
| 72 | 72 | PptRecordExOleVbaActiveXAtom) |
| 73 | -from oletools.ooxml import ZipSubFile | |
| 73 | +from oletools.ooxml import XmlParser, ZipSubFile | |
| 74 | 74 | |
| 75 | 75 | # ----------------------------------------------------------------------------- |
| 76 | 76 | # CHANGELOG: |
| ... | ... | @@ -178,6 +178,7 @@ else: |
| 178 | 178 | NULL_CHAR = 0 # pylint: disable=redefined-variable-type |
| 179 | 179 | xrange = range # pylint: disable=redefined-builtin, invalid-name |
| 180 | 180 | |
| 181 | +OOXML_RELATIONSHIP_TAG = '{http://schemas.openxmlformats.org/package/2006/relationships}Relationship' | |
| 181 | 182 | |
| 182 | 183 | # === GLOBAL VARIABLES ======================================================== |
| 183 | 184 | |
| ... | ... | @@ -203,6 +204,24 @@ RETURN_ERR_ARGS = 2 # reserve for OptionParser.parse_args |
| 203 | 204 | RETURN_ERR_STREAM = 4 # error opening/parsing a stream |
| 204 | 205 | RETURN_ERR_DUMP = 8 # error dumping data from stream to file |
| 205 | 206 | |
| 207 | +# Not sure if they can all be "External", but just in case | |
| 208 | +BLACKLISTED_RELATIONSHIP_TYPES = [ | |
| 209 | + 'attachedTemplate', | |
| 210 | + 'externalLink', | |
| 211 | + 'externalLinkPath', | |
| 212 | + 'externalReference' | |
| 213 | + 'frame' | |
| 214 | + 'hyperlink', | |
| 215 | + 'officeDocument', | |
| 216 | + 'oleObject', | |
| 217 | + 'package', | |
| 218 | + 'slideUpdateUrl', | |
| 219 | + 'slideMaster', | |
| 220 | + 'slide', | |
| 221 | + 'slideUpdateInfo', | |
| 222 | + 'subDocument', | |
| 223 | + 'worksheet' | |
| 224 | +] | |
| 206 | 225 | |
| 207 | 226 | # === FUNCTIONS =============================================================== |
| 208 | 227 | |
| ... | ... | @@ -671,6 +690,22 @@ def find_ole(filename, data): |
| 671 | 690 | ole.close() |
| 672 | 691 | |
| 673 | 692 | |
| 693 | +def find_external_relationships(xml_parser): | |
| 694 | + """ iterate XML files looking for relationships to external objects | |
| 695 | + """ | |
| 696 | + for _, elem, _ in xml_parser.iter_xml(None, False, OOXML_RELATIONSHIP_TAG): | |
| 697 | + try: | |
| 698 | + if elem.attrib['TargetMode'] == 'External': | |
| 699 | + relationship_type = elem.attrib['Type'].rsplit('/', 1)[1] | |
| 700 | + | |
| 701 | + if relationship_type in BLACKLISTED_RELATIONSHIP_TYPES: | |
| 702 | + yield relationship_type, elem.attrib['Target'] | |
| 703 | + except (AttributeError, KeyError): | |
| 704 | + # ignore missing attributes - Word won't detect | |
| 705 | + # external links anyway | |
| 706 | + pass | |
| 707 | + | |
| 708 | + | |
| 674 | 709 | def process_file(filename, data, output_dir=None): |
| 675 | 710 | """ find embedded objects in given file |
| 676 | 711 | |
| ... | ... | @@ -703,6 +738,14 @@ def process_file(filename, data, output_dir=None): |
| 703 | 738 | err_dumping = False |
| 704 | 739 | did_dump = False |
| 705 | 740 | |
| 741 | + if is_zipfile(filename): | |
| 742 | + log.info('file is a OOXML file, looking for relationships with external files') | |
| 743 | + xml_parser = XmlParser(filename) | |
| 744 | + for relationship, target in find_external_relationships(xml_parser): | |
| 745 | + did_dump = True | |
| 746 | + print("Found relationship '%s' with external file %s" % (relationship, target)) | |
| 747 | + | |
| 748 | + | |
| 706 | 749 | # look for ole files inside file (e.g. unzip docx) |
| 707 | 750 | # have to finish work on every ole stream inside iteration, since handles |
| 708 | 751 | # are closed in find_ole | ... | ... |