Commit f4bb562c19054835268d3ac85a75c1aa3362519f

Authored by decalage2
1 parent 986f132e

rtfobj: fixed issue #78, improved regex

oletools/oleobj.py
... ... @@ -386,7 +386,7 @@ def process_file(container, filename, data, output_dir=None):
386 386 open(fname, 'wb').write(opkg.data)
387 387 index += 1
388 388 except:
389   - log.info('*** Not an OLE 1.0 Object')
  389 + log.debug('*** Not an OLE 1.0 Object')
390 390  
391 391  
392 392  
... ...
oletools/rtfobj.py
... ... @@ -62,6 +62,7 @@ http://www.decalage.info/python/oletools
62 62 # 2016-07-31 PL: - table output with tablestream
63 63 # 2016-08-01 PL: - detect executable filenames in OLE Package
64 64 # 2016-08-08 PL: - added option -s to save objects to files
  65 +# 2016-08-09 PL: - fixed issue #78, improved regex
65 66  
66 67 __version__ = '0.50'
67 68  
... ... @@ -171,11 +172,13 @@ ASCII_NAME = b'([a-zA-Z]{1,250})'
171 172 # SIGNED_INTEGER = r'(-?\d{1,250})'
172 173 SIGNED_INTEGER = b'(-?\\d+)'
173 174  
174   -CONTROL_WORD = b'(?:\\\\' + ASCII_NAME + b'(?:(?=[^a-zA-Z0-9-])|' + SIGNED_INTEGER + b'(?=[^0-9])))'
  175 +# Note for issue #78: need to match "\A-" not followed by digits
  176 +CONTROL_WORD = b'(?:\\\\' + ASCII_NAME + b'(?:' + SIGNED_INTEGER + b'(?=[^0-9])|(?=[^a-zA-Z0-9])))'
175 177  
176 178 re_control_word = re.compile(CONTROL_WORD)
177 179  
178   -CONTROL_SYMBOL = b'(?:\\\\[^a-zA-Z0-9])'
  180 +# Note for issue #78: need to match "\" followed by digit (any non-alpha)
  181 +CONTROL_SYMBOL = b'(?:\\\\[^a-zA-Z])'
179 182 re_control_symbol = re.compile(CONTROL_SYMBOL)
180 183  
181 184 # Text that is not a control word/symbol or a group:
... ...