Commit 27c1bacab497283484afabe2bc47116d0976171a

Authored by Christian Herdtweck
Committed by Philippe Lagadec
1 parent 2142e7a6

msodde: Argument to select OOXML fields: dde, all, filtered

Showing 1 changed file with 48 additions and 8 deletions
oletools/msodde.py
... ... @@ -51,8 +51,9 @@ from __future__ import print_function
51 51 # 2017-10-25 CH: - add json output
52 52 # 2017-10-25 CH: - parse doc
53 53 # PL: - added logging
  54 +# 2017-11-10 CH: - added field blacklist and corresponding cmd line args
54 55  
55   -__version__ = '0.52dev4'
  56 +__version__ = '0.52dev5'
56 57  
57 58 #------------------------------------------------------------------------------
58 59 # TODO: field codes can be in headers/footers/comments - parse these
... ... @@ -181,6 +182,14 @@ FIELD_BLACKLIST = (
181 182 ('USERNAME', 0, 1, '', '', 'string'),
182 183 )
183 184  
  185 +FIELD_DDE_REGEX = re.compile(r'^\s*dde(auto)?\s+', re.I)
  186 +
  187 +FIELD_FILTER_DDE = 'only dde'
  188 +FIELD_FILTER_BLACKLIST = 'exclude blacklisted'
  189 +FIELD_FILTER_ALL = 'keep all'
  190 +FIELD_FILTER_DEFAULT = FIELD_FILTER_BLACKLIST
  191 +
  192 +
184 193 # banner to be printed at program start
185 194 BANNER = """msodde %s - http://decalage.info/python/oletools
186 195 THIS IS WORK IN PROGRESS - Check updates regularly!
... ... @@ -316,11 +325,26 @@ def process_args(cmd_line_args=None):
316 325 parser = ArgParserWithBanner(description='A python tool to detect and extract DDE links in MS Office files')
317 326 parser.add_argument("filepath", help="path of the file to be analyzed",
318 327 type=existing_file, metavar='FILE')
319   - parser.add_argument("--json", '-j', action='store_true',
  328 + parser.add_argument('-j', "--json", action='store_true',
320 329 help="Output in json format. Do not use with -ldebug")
321 330 parser.add_argument("--nounquote", help="don't unquote values",action='store_true')
322 331 parser.add_argument('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
323 332 help="logging level debug/info/warning/error/critical (default=%(default)s)")
  333 + filter_group = parser.add_argument_group(
  334 + title='Filter which OpenXML field commands are returned',
  335 + description='Only applies to OpenXML (e.g. docx), not to OLE (e.g. '
  336 + '.doc). These options are mutually exclusive, last option '
  337 + 'found on command line overwrites earlier ones.')
  338 + filter_group.add_argument('-d', '--dde-only', action='store_const',
  339 + dest='field_filter_mode', const=FIELD_FILTER_DDE,
  340 + help='Return only DDE and DDEAUTO fields')
  341 + filter_group.add_argument('-f', '--filter', action='store_const',
  342 + dest='field_filter_mode', const=FIELD_FILTER_BLACKLIST,
  343 + help='Return all fields except harmless ones like PAGE')
  344 + filter_group.add_argument('-a', '--all-fields', action='store_const',
  345 + dest='field_filter_mode', const=FIELD_FILTER_ALL,
  346 + help='Return all fields, irrespective of their contents')
  347 + parser.set_defaults(field_filter_mode=FIELD_FILTER_DEFAULT)
324 348  
325 349 return parser.parse_args(cmd_line_args)
326 350  
... ... @@ -469,7 +493,7 @@ def process_ole(filepath):
469 493 return u'\n'.join(text_parts)
470 494  
471 495  
472   -def process_openxml(filepath):
  496 +def process_openxml(filepath, field_filter_mode=None):
473 497 log.debug('process_openxml')
474 498 all_fields = []
475 499 z = zipfile.ZipFile(filepath)
... ... @@ -483,7 +507,23 @@ def process_openxml(filepath):
483 507 # print(f)
484 508 all_fields.extend(fields)
485 509 z.close()
486   - return u'\n'.join(all_fields)
  510 +
  511 + # apply field command filter
  512 + log.debug('filtering with mode "{0}"'.format(field_filter_mode))
  513 + if field_filter_mode in (FIELD_FILTER_ALL, None):
  514 + clean_fields = all_fields
  515 + elif field_filter_mode == FIELD_FILTER_DDE:
  516 + clean_fields = [field for field in all_fields
  517 + if FIELD_DDE_REGEX.match(field)]
  518 + elif field_filter_mode == FIELD_FILTER_BLACKLIST:
  519 + # check if fields are acceptable and should not be returned
  520 + clean_fields = [field for field in all_fields
  521 + if not field_is_blacklisted(field.strip())]
  522 + else:
  523 + raise ValueError('Unexpected field_filter_mode: "{0}"'
  524 + .format(field_filter_mode))
  525 +
  526 + return u'\n'.join(clean_fields)
487 527  
488 528 def process_xml(data):
489 529 # parse the XML data:
... ... @@ -532,7 +572,7 @@ def process_xml(data):
532 572 # concatenate the attribute of the field, if present:
533 573 if elem.attrib is not None:
534 574 fields.append(elem.attrib[ATTR_W_INSTR])
535   -
  575 +
536 576 return fields
537 577  
538 578 def unquote(field):
... ... @@ -637,12 +677,12 @@ def field_is_blacklisted(contents):
637 677 return True
638 678  
639 679  
640   -def process_file(filepath):
  680 +def process_file(filepath, field_filter_mode=None):
641 681 """ decides to either call process_openxml or process_ole """
642 682 if olefile.isOleFile(filepath):
643 683 return process_ole(filepath)
644 684 else:
645   - return process_openxml(filepath)
  685 + return process_openxml(filepath, field_filter_mode)
646 686  
647 687  
648 688 #=== MAIN =================================================================
... ... @@ -684,7 +724,7 @@ def main(cmd_line_args=None):
684 724 text = ''
685 725 return_code = 1
686 726 try:
687   - text = process_file(args.filepath)
  727 + text = process_file(args.filepath, args.field_filter_mode)
688 728 return_code = 0
689 729 except Exception as exc:
690 730 if args.json:
... ...