Commit 455c85b476cfc36d40355bb99d94076bd7c37f41

Authored by Philippe Lagadec
1 parent 209688eb

rtfobj: sanitize filenames to avoid special characters

Showing 1 changed file with 31 additions and 3 deletions
oletools/rtfobj.py
@@ -50,6 +50,7 @@ http://www.decalage.info/python/oletools @@ -50,6 +50,7 @@ http://www.decalage.info/python/oletools
50 # 2016-04-07 v0.45 PL: - improved parsing to handle some malware tricks 50 # 2016-04-07 v0.45 PL: - improved parsing to handle some malware tricks
51 # 2016-05-06 v0.47 TJ: - added option -d to set the output directory 51 # 2016-05-06 v0.47 TJ: - added option -d to set the output directory
52 # (contribution by Thomas Jarosch) 52 # (contribution by Thomas Jarosch)
  53 +# TJ: - sanitize filenames to avoid special characters
53 54
54 __version__ = '0.47' 55 __version__ = '0.47'
55 56
@@ -282,15 +283,41 @@ def rtf_iter_objects (data, min_size=32): @@ -282,15 +283,41 @@ def rtf_iter_objects (data, min_size=32):
282 match = re_hexblock.search(data, pos=current) 283 match = re_hexblock.search(data, pos=current)
283 284
284 285
  286 +
  287 +def sanitize_filename(filename, replacement='_', max_length=200):
  288 + """compute basename of filename. Replaces all non-whitelisted characters.
  289 + The returned filename is always a basename of the file."""
  290 + basepath = os.path.basename(filename).strip()
  291 + sane_fname = re.sub(r'[^\w\.\- ]', replacement, basepath)
  292 +
  293 + while ".." in sane_fname:
  294 + sane_fname = sane_fname.replace('..', '.')
  295 +
  296 + while " " in sane_fname:
  297 + sane_fname = sane_fname.replace(' ', ' ')
  298 +
  299 + if not len(filename):
  300 + sane_fname = 'NONAME'
  301 +
  302 + # limit filename length
  303 + if max_length:
  304 + sane_fname = sane_fname[:max_length]
  305 +
  306 + return sane_fname
  307 +
  308 +
285 def process_file(container, filename, data, output_dir=None): 309 def process_file(container, filename, data, output_dir=None):
286 if output_dir: 310 if output_dir:
287 if not os.path.isdir(output_dir): 311 if not os.path.isdir(output_dir):
288 log.info('creating output directory %s' % output_dir) 312 log.info('creating output directory %s' % output_dir)
289 os.mkdir(output_dir) 313 os.mkdir(output_dir)
290 314
291 - fname_prefix = os.path.join(output_dir, os.path.basename(filename)) 315 + fname_prefix = os.path.join(output_dir,
  316 + sanitize_filename(filename))
292 else: 317 else:
293 - fname_prefix = filename 318 + base_dir = os.path.dirname(filename)
  319 + sane_fname = sanitize_filename(filename)
  320 + fname_prefix = os.path.join(base_dir, sane_fname)
294 321
295 # TODO: option to extract objects to files (false by default) 322 # TODO: option to extract objects to files (false by default)
296 if data is None: 323 if data is None:
@@ -330,7 +357,8 @@ def process_file(container, filename, data, output_dir=None): @@ -330,7 +357,8 @@ def process_file(container, filename, data, output_dir=None):
330 print 'Source path = %r' % opkg.src_path 357 print 'Source path = %r' % opkg.src_path
331 print 'Temp path = %r' % opkg.temp_path 358 print 'Temp path = %r' % opkg.temp_path
332 if opkg.filename: 359 if opkg.filename:
333 - fname = '%s_%s' % (fname_prefix, opkg.filename) 360 + fname = '%s_%s' % (fname_prefix,
  361 + sanitize_filename(opkg.filename))
334 else: 362 else:
335 fname = '%s_object_%08X.noname' % (fname_prefix, index) 363 fname = '%s_object_%08X.noname' % (fname_prefix, index)
336 print 'saving to file %s' % fname 364 print 'saving to file %s' % fname