diff --git a/oletools/msodde.py b/oletools/msodde.py index 69eac6c..0df0a8c 100644 --- a/oletools/msodde.py +++ b/oletools/msodde.py @@ -61,7 +61,9 @@ import olefile from oletools import ooxml from oletools import xls_parser from oletools import rtfobj +from oletools import oleid from oletools.common.log_helper import log_helper +from oletools.common.errors import FileIsEncryptedError # ----------------------------------------------------------------------------- # CHANGELOG: @@ -438,17 +440,18 @@ def process_doc_stream(stream): return result_parts -def process_doc(filepath): +def process_doc(ole): """ find dde links in word ole (.doc/.dot) file + Checks whether files is ppt and returns empty immediately in that case + (ppt files cannot contain DDE-links to my knowledge) + like process_xml, returns a concatenated unicode string of dde links or empty if none were found. dde-links will still begin with the dde[auto] key word (possibly after some whitespace) """ logger.debug('process_doc') - ole = olefile.OleFileIO(filepath, path_encoding=None) - links = [] for sid, direntry in enumerate(ole.direntries): is_orphan = direntry is None @@ -886,9 +889,20 @@ def process_file(filepath, field_filter_mode=None): if xls_parser.is_xls(filepath): logger.debug('Process file as excel 2003 (xls)') return process_xls(filepath) + + # encrypted files also look like ole, even if office 2007+ (xml-based) + # so check for encryption, first + ole = olefile.OleFileIO(filepath, path_encoding=None) + oid = oleid.OleID(ole) + if oid.check_encrypted().value: + log.debug('is encrypted - raise error') + raise FileIsEncryptedError(filepath) + elif oid.check_powerpoint().value: + log.debug('is ppt - cannot have DDE') + return u'' else: logger.debug('Process file as word 2003 (doc)') - return process_doc(filepath) + return process_doc(ole) with open(filepath, 'rb') as file_handle: if file_handle.read(4) == RTF_START: