Commit 455c85b476cfc36d40355bb99d94076bd7c37f41
1 parent
209688eb
rtfobj: sanitize filenames to avoid special characters
Showing
1 changed file
with
31 additions
and
3 deletions
oletools/rtfobj.py
| ... | ... | @@ -50,6 +50,7 @@ http://www.decalage.info/python/oletools |
| 50 | 50 | # 2016-04-07 v0.45 PL: - improved parsing to handle some malware tricks |
| 51 | 51 | # 2016-05-06 v0.47 TJ: - added option -d to set the output directory |
| 52 | 52 | # (contribution by Thomas Jarosch) |
| 53 | +# TJ: - sanitize filenames to avoid special characters | |
| 53 | 54 | |
| 54 | 55 | __version__ = '0.47' |
| 55 | 56 | |
| ... | ... | @@ -282,15 +283,41 @@ def rtf_iter_objects (data, min_size=32): |
| 282 | 283 | match = re_hexblock.search(data, pos=current) |
| 283 | 284 | |
| 284 | 285 | |
| 286 | + | |
| 287 | +def sanitize_filename(filename, replacement='_', max_length=200): | |
| 288 | + """compute basename of filename. Replaces all non-whitelisted characters. | |
| 289 | + The returned filename is always a basename of the file.""" | |
| 290 | + basepath = os.path.basename(filename).strip() | |
| 291 | + sane_fname = re.sub(r'[^\w\.\- ]', replacement, basepath) | |
| 292 | + | |
| 293 | + while ".." in sane_fname: | |
| 294 | + sane_fname = sane_fname.replace('..', '.') | |
| 295 | + | |
| 296 | + while " " in sane_fname: | |
| 297 | + sane_fname = sane_fname.replace(' ', ' ') | |
| 298 | + | |
| 299 | + if not len(filename): | |
| 300 | + sane_fname = 'NONAME' | |
| 301 | + | |
| 302 | + # limit filename length | |
| 303 | + if max_length: | |
| 304 | + sane_fname = sane_fname[:max_length] | |
| 305 | + | |
| 306 | + return sane_fname | |
| 307 | + | |
| 308 | + | |
| 285 | 309 | def process_file(container, filename, data, output_dir=None): |
| 286 | 310 | if output_dir: |
| 287 | 311 | if not os.path.isdir(output_dir): |
| 288 | 312 | log.info('creating output directory %s' % output_dir) |
| 289 | 313 | os.mkdir(output_dir) |
| 290 | 314 | |
| 291 | - fname_prefix = os.path.join(output_dir, os.path.basename(filename)) | |
| 315 | + fname_prefix = os.path.join(output_dir, | |
| 316 | + sanitize_filename(filename)) | |
| 292 | 317 | else: |
| 293 | - fname_prefix = filename | |
| 318 | + base_dir = os.path.dirname(filename) | |
| 319 | + sane_fname = sanitize_filename(filename) | |
| 320 | + fname_prefix = os.path.join(base_dir, sane_fname) | |
| 294 | 321 | |
| 295 | 322 | # TODO: option to extract objects to files (false by default) |
| 296 | 323 | if data is None: |
| ... | ... | @@ -330,7 +357,8 @@ def process_file(container, filename, data, output_dir=None): |
| 330 | 357 | print 'Source path = %r' % opkg.src_path |
| 331 | 358 | print 'Temp path = %r' % opkg.temp_path |
| 332 | 359 | if opkg.filename: |
| 333 | - fname = '%s_%s' % (fname_prefix, opkg.filename) | |
| 360 | + fname = '%s_%s' % (fname_prefix, | |
| 361 | + sanitize_filename(opkg.filename)) | |
| 334 | 362 | else: |
| 335 | 363 | fname = '%s_object_%08X.noname' % (fname_prefix, index) |
| 336 | 364 | print 'saving to file %s' % fname | ... | ... |