From f4bb562c19054835268d3ac85a75c1aa3362519f Mon Sep 17 00:00:00 2001 From: decalage2 Date: Tue, 9 Aug 2016 17:59:39 +0200 Subject: [PATCH] rtfobj: fixed issue #78, improved regex --- oletools/oleobj.py | 2 +- oletools/rtfobj.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/oletools/oleobj.py b/oletools/oleobj.py index 6ace467..876d363 100755 --- a/oletools/oleobj.py +++ b/oletools/oleobj.py @@ -386,7 +386,7 @@ def process_file(container, filename, data, output_dir=None): open(fname, 'wb').write(opkg.data) index += 1 except: - log.info('*** Not an OLE 1.0 Object') + log.debug('*** Not an OLE 1.0 Object') diff --git a/oletools/rtfobj.py b/oletools/rtfobj.py index 8e802c3..76888f9 100755 --- a/oletools/rtfobj.py +++ b/oletools/rtfobj.py @@ -62,6 +62,7 @@ http://www.decalage.info/python/oletools # 2016-07-31 PL: - table output with tablestream # 2016-08-01 PL: - detect executable filenames in OLE Package # 2016-08-08 PL: - added option -s to save objects to files +# 2016-08-09 PL: - fixed issue #78, improved regex __version__ = '0.50' @@ -171,11 +172,13 @@ ASCII_NAME = b'([a-zA-Z]{1,250})' # SIGNED_INTEGER = r'(-?\d{1,250})' SIGNED_INTEGER = b'(-?\\d+)' -CONTROL_WORD = b'(?:\\\\' + ASCII_NAME + b'(?:(?=[^a-zA-Z0-9-])|' + SIGNED_INTEGER + b'(?=[^0-9])))' +# Note for issue #78: need to match "\A-" not followed by digits +CONTROL_WORD = b'(?:\\\\' + ASCII_NAME + b'(?:' + SIGNED_INTEGER + b'(?=[^0-9])|(?=[^a-zA-Z0-9])))' re_control_word = re.compile(CONTROL_WORD) -CONTROL_SYMBOL = b'(?:\\\\[^a-zA-Z0-9])' +# Note for issue #78: need to match "\" followed by digit (any non-alpha) +CONTROL_SYMBOL = b'(?:\\\\[^a-zA-Z])' re_control_symbol = re.compile(CONTROL_SYMBOL) # Text that is not a control word/symbol or a group: -- libgit2 0.21.4