Commit f4bb562c19054835268d3ac85a75c1aa3362519f
1 parent
986f132e
rtfobj: fixed issue #78, improved regex
Showing
2 changed files
with
6 additions
and
3 deletions
oletools/oleobj.py
| @@ -386,7 +386,7 @@ def process_file(container, filename, data, output_dir=None): | @@ -386,7 +386,7 @@ def process_file(container, filename, data, output_dir=None): | ||
| 386 | open(fname, 'wb').write(opkg.data) | 386 | open(fname, 'wb').write(opkg.data) |
| 387 | index += 1 | 387 | index += 1 |
| 388 | except: | 388 | except: |
| 389 | - log.info('*** Not an OLE 1.0 Object') | 389 | + log.debug('*** Not an OLE 1.0 Object') |
| 390 | 390 | ||
| 391 | 391 | ||
| 392 | 392 |
oletools/rtfobj.py
| @@ -62,6 +62,7 @@ http://www.decalage.info/python/oletools | @@ -62,6 +62,7 @@ http://www.decalage.info/python/oletools | ||
| 62 | # 2016-07-31 PL: - table output with tablestream | 62 | # 2016-07-31 PL: - table output with tablestream |
| 63 | # 2016-08-01 PL: - detect executable filenames in OLE Package | 63 | # 2016-08-01 PL: - detect executable filenames in OLE Package |
| 64 | # 2016-08-08 PL: - added option -s to save objects to files | 64 | # 2016-08-08 PL: - added option -s to save objects to files |
| 65 | +# 2016-08-09 PL: - fixed issue #78, improved regex | ||
| 65 | 66 | ||
| 66 | __version__ = '0.50' | 67 | __version__ = '0.50' |
| 67 | 68 | ||
| @@ -171,11 +172,13 @@ ASCII_NAME = b'([a-zA-Z]{1,250})' | @@ -171,11 +172,13 @@ ASCII_NAME = b'([a-zA-Z]{1,250})' | ||
| 171 | # SIGNED_INTEGER = r'(-?\d{1,250})' | 172 | # SIGNED_INTEGER = r'(-?\d{1,250})' |
| 172 | SIGNED_INTEGER = b'(-?\\d+)' | 173 | SIGNED_INTEGER = b'(-?\\d+)' |
| 173 | 174 | ||
| 174 | -CONTROL_WORD = b'(?:\\\\' + ASCII_NAME + b'(?:(?=[^a-zA-Z0-9-])|' + SIGNED_INTEGER + b'(?=[^0-9])))' | 175 | +# Note for issue #78: need to match "\A-" not followed by digits |
| 176 | +CONTROL_WORD = b'(?:\\\\' + ASCII_NAME + b'(?:' + SIGNED_INTEGER + b'(?=[^0-9])|(?=[^a-zA-Z0-9])))' | ||
| 175 | 177 | ||
| 176 | re_control_word = re.compile(CONTROL_WORD) | 178 | re_control_word = re.compile(CONTROL_WORD) |
| 177 | 179 | ||
| 178 | -CONTROL_SYMBOL = b'(?:\\\\[^a-zA-Z0-9])' | 180 | +# Note for issue #78: need to match "\" followed by digit (any non-alpha) |
| 181 | +CONTROL_SYMBOL = b'(?:\\\\[^a-zA-Z])' | ||
| 179 | re_control_symbol = re.compile(CONTROL_SYMBOL) | 182 | re_control_symbol = re.compile(CONTROL_SYMBOL) |
| 180 | 183 | ||
| 181 | # Text that is not a control word/symbol or a group: | 184 | # Text that is not a control word/symbol or a group: |