Commit f4bb562c19054835268d3ac85a75c1aa3362519f

Authored by decalage2
1 parent 986f132e

rtfobj: fixed issue #78, improved regex

oletools/oleobj.py
@@ -386,7 +386,7 @@ def process_file(container, filename, data, output_dir=None): @@ -386,7 +386,7 @@ def process_file(container, filename, data, output_dir=None):
386 open(fname, 'wb').write(opkg.data) 386 open(fname, 'wb').write(opkg.data)
387 index += 1 387 index += 1
388 except: 388 except:
389 - log.info('*** Not an OLE 1.0 Object') 389 + log.debug('*** Not an OLE 1.0 Object')
390 390
391 391
392 392
oletools/rtfobj.py
@@ -62,6 +62,7 @@ http://www.decalage.info/python/oletools @@ -62,6 +62,7 @@ http://www.decalage.info/python/oletools
62 # 2016-07-31 PL: - table output with tablestream 62 # 2016-07-31 PL: - table output with tablestream
63 # 2016-08-01 PL: - detect executable filenames in OLE Package 63 # 2016-08-01 PL: - detect executable filenames in OLE Package
64 # 2016-08-08 PL: - added option -s to save objects to files 64 # 2016-08-08 PL: - added option -s to save objects to files
  65 +# 2016-08-09 PL: - fixed issue #78, improved regex
65 66
66 __version__ = '0.50' 67 __version__ = '0.50'
67 68
@@ -171,11 +172,13 @@ ASCII_NAME = b'([a-zA-Z]{1,250})' @@ -171,11 +172,13 @@ ASCII_NAME = b'([a-zA-Z]{1,250})'
171 # SIGNED_INTEGER = r'(-?\d{1,250})' 172 # SIGNED_INTEGER = r'(-?\d{1,250})'
172 SIGNED_INTEGER = b'(-?\\d+)' 173 SIGNED_INTEGER = b'(-?\\d+)'
173 174
174 -CONTROL_WORD = b'(?:\\\\' + ASCII_NAME + b'(?:(?=[^a-zA-Z0-9-])|' + SIGNED_INTEGER + b'(?=[^0-9])))' 175 +# Note for issue #78: need to match "\A-" not followed by digits
  176 +CONTROL_WORD = b'(?:\\\\' + ASCII_NAME + b'(?:' + SIGNED_INTEGER + b'(?=[^0-9])|(?=[^a-zA-Z0-9])))'
175 177
176 re_control_word = re.compile(CONTROL_WORD) 178 re_control_word = re.compile(CONTROL_WORD)
177 179
178 -CONTROL_SYMBOL = b'(?:\\\\[^a-zA-Z0-9])' 180 +# Note for issue #78: need to match "\" followed by digit (any non-alpha)
  181 +CONTROL_SYMBOL = b'(?:\\\\[^a-zA-Z])'
179 re_control_symbol = re.compile(CONTROL_SYMBOL) 182 re_control_symbol = re.compile(CONTROL_SYMBOL)
180 183
181 # Text that is not a control word/symbol or a group: 184 # Text that is not a control word/symbol or a group: