Commit f4bb562c19054835268d3ac85a75c1aa3362519f
1 parent
986f132e
rtfobj: fixed issue #78, improved regex
Showing
2 changed files
with
6 additions
and
3 deletions
oletools/oleobj.py
oletools/rtfobj.py
| ... | ... | @@ -62,6 +62,7 @@ http://www.decalage.info/python/oletools |
| 62 | 62 | # 2016-07-31 PL: - table output with tablestream |
| 63 | 63 | # 2016-08-01 PL: - detect executable filenames in OLE Package |
| 64 | 64 | # 2016-08-08 PL: - added option -s to save objects to files |
| 65 | +# 2016-08-09 PL: - fixed issue #78, improved regex | |
| 65 | 66 | |
| 66 | 67 | __version__ = '0.50' |
| 67 | 68 | |
| ... | ... | @@ -171,11 +172,13 @@ ASCII_NAME = b'([a-zA-Z]{1,250})' |
| 171 | 172 | # SIGNED_INTEGER = r'(-?\d{1,250})' |
| 172 | 173 | SIGNED_INTEGER = b'(-?\\d+)' |
| 173 | 174 | |
| 174 | -CONTROL_WORD = b'(?:\\\\' + ASCII_NAME + b'(?:(?=[^a-zA-Z0-9-])|' + SIGNED_INTEGER + b'(?=[^0-9])))' | |
| 175 | +# Note for issue #78: need to match "\A-" not followed by digits | |
| 176 | +CONTROL_WORD = b'(?:\\\\' + ASCII_NAME + b'(?:' + SIGNED_INTEGER + b'(?=[^0-9])|(?=[^a-zA-Z0-9])))' | |
| 175 | 177 | |
| 176 | 178 | re_control_word = re.compile(CONTROL_WORD) |
| 177 | 179 | |
| 178 | -CONTROL_SYMBOL = b'(?:\\\\[^a-zA-Z0-9])' | |
| 180 | +# Note for issue #78: need to match "\" followed by digit (any non-alpha) | |
| 181 | +CONTROL_SYMBOL = b'(?:\\\\[^a-zA-Z])' | |
| 179 | 182 | re_control_symbol = re.compile(CONTROL_SYMBOL) |
| 180 | 183 | |
| 181 | 184 | # Text that is not a control word/symbol or a group: | ... | ... |