Commit 455c85b476cfc36d40355bb99d94076bd7c37f41
1 parent
209688eb
rtfobj: sanitize filenames to avoid special characters
Showing
1 changed file
with
31 additions
and
3 deletions
oletools/rtfobj.py
| @@ -50,6 +50,7 @@ http://www.decalage.info/python/oletools | @@ -50,6 +50,7 @@ http://www.decalage.info/python/oletools | ||
| 50 | # 2016-04-07 v0.45 PL: - improved parsing to handle some malware tricks | 50 | # 2016-04-07 v0.45 PL: - improved parsing to handle some malware tricks |
| 51 | # 2016-05-06 v0.47 TJ: - added option -d to set the output directory | 51 | # 2016-05-06 v0.47 TJ: - added option -d to set the output directory |
| 52 | # (contribution by Thomas Jarosch) | 52 | # (contribution by Thomas Jarosch) |
| 53 | +# TJ: - sanitize filenames to avoid special characters | ||
| 53 | 54 | ||
| 54 | __version__ = '0.47' | 55 | __version__ = '0.47' |
| 55 | 56 | ||
| @@ -282,15 +283,41 @@ def rtf_iter_objects (data, min_size=32): | @@ -282,15 +283,41 @@ def rtf_iter_objects (data, min_size=32): | ||
| 282 | match = re_hexblock.search(data, pos=current) | 283 | match = re_hexblock.search(data, pos=current) |
| 283 | 284 | ||
| 284 | 285 | ||
| 286 | + | ||
| 287 | +def sanitize_filename(filename, replacement='_', max_length=200): | ||
| 288 | + """compute basename of filename. Replaces all non-whitelisted characters. | ||
| 289 | + The returned filename is always a basename of the file.""" | ||
| 290 | + basepath = os.path.basename(filename).strip() | ||
| 291 | + sane_fname = re.sub(r'[^\w\.\- ]', replacement, basepath) | ||
| 292 | + | ||
| 293 | + while ".." in sane_fname: | ||
| 294 | + sane_fname = sane_fname.replace('..', '.') | ||
| 295 | + | ||
| 296 | + while " " in sane_fname: | ||
| 297 | + sane_fname = sane_fname.replace(' ', ' ') | ||
| 298 | + | ||
| 299 | + if not len(filename): | ||
| 300 | + sane_fname = 'NONAME' | ||
| 301 | + | ||
| 302 | + # limit filename length | ||
| 303 | + if max_length: | ||
| 304 | + sane_fname = sane_fname[:max_length] | ||
| 305 | + | ||
| 306 | + return sane_fname | ||
| 307 | + | ||
| 308 | + | ||
| 285 | def process_file(container, filename, data, output_dir=None): | 309 | def process_file(container, filename, data, output_dir=None): |
| 286 | if output_dir: | 310 | if output_dir: |
| 287 | if not os.path.isdir(output_dir): | 311 | if not os.path.isdir(output_dir): |
| 288 | log.info('creating output directory %s' % output_dir) | 312 | log.info('creating output directory %s' % output_dir) |
| 289 | os.mkdir(output_dir) | 313 | os.mkdir(output_dir) |
| 290 | 314 | ||
| 291 | - fname_prefix = os.path.join(output_dir, os.path.basename(filename)) | 315 | + fname_prefix = os.path.join(output_dir, |
| 316 | + sanitize_filename(filename)) | ||
| 292 | else: | 317 | else: |
| 293 | - fname_prefix = filename | 318 | + base_dir = os.path.dirname(filename) |
| 319 | + sane_fname = sanitize_filename(filename) | ||
| 320 | + fname_prefix = os.path.join(base_dir, sane_fname) | ||
| 294 | 321 | ||
| 295 | # TODO: option to extract objects to files (false by default) | 322 | # TODO: option to extract objects to files (false by default) |
| 296 | if data is None: | 323 | if data is None: |
| @@ -330,7 +357,8 @@ def process_file(container, filename, data, output_dir=None): | @@ -330,7 +357,8 @@ def process_file(container, filename, data, output_dir=None): | ||
| 330 | print 'Source path = %r' % opkg.src_path | 357 | print 'Source path = %r' % opkg.src_path |
| 331 | print 'Temp path = %r' % opkg.temp_path | 358 | print 'Temp path = %r' % opkg.temp_path |
| 332 | if opkg.filename: | 359 | if opkg.filename: |
| 333 | - fname = '%s_%s' % (fname_prefix, opkg.filename) | 360 | + fname = '%s_%s' % (fname_prefix, |
| 361 | + sanitize_filename(opkg.filename)) | ||
| 334 | else: | 362 | else: |
| 335 | fname = '%s_object_%08X.noname' % (fname_prefix, index) | 363 | fname = '%s_object_%08X.noname' % (fname_prefix, index) |
| 336 | print 'saving to file %s' % fname | 364 | print 'saving to file %s' % fname |