Commit 27c1bacab497283484afabe2bc47116d0976171a

Authored by Christian Herdtweck
Committed by Philippe Lagadec
1 parent 2142e7a6

msodde: Argument to select OOXML fields: dde, all, filtered

Showing 1 changed file with 48 additions and 8 deletions
oletools/msodde.py
@@ -51,8 +51,9 @@ from __future__ import print_function @@ -51,8 +51,9 @@ from __future__ import print_function
51 # 2017-10-25 CH: - add json output 51 # 2017-10-25 CH: - add json output
52 # 2017-10-25 CH: - parse doc 52 # 2017-10-25 CH: - parse doc
53 # PL: - added logging 53 # PL: - added logging
  54 +# 2017-11-10 CH: - added field blacklist and corresponding cmd line args
54 55
55 -__version__ = '0.52dev4' 56 +__version__ = '0.52dev5'
56 57
57 #------------------------------------------------------------------------------ 58 #------------------------------------------------------------------------------
58 # TODO: field codes can be in headers/footers/comments - parse these 59 # TODO: field codes can be in headers/footers/comments - parse these
@@ -181,6 +182,14 @@ FIELD_BLACKLIST = ( @@ -181,6 +182,14 @@ FIELD_BLACKLIST = (
181 ('USERNAME', 0, 1, '', '', 'string'), 182 ('USERNAME', 0, 1, '', '', 'string'),
182 ) 183 )
183 184
  185 +FIELD_DDE_REGEX = re.compile(r'^\s*dde(auto)?\s+', re.I)
  186 +
  187 +FIELD_FILTER_DDE = 'only dde'
  188 +FIELD_FILTER_BLACKLIST = 'exclude blacklisted'
  189 +FIELD_FILTER_ALL = 'keep all'
  190 +FIELD_FILTER_DEFAULT = FIELD_FILTER_BLACKLIST
  191 +
  192 +
184 # banner to be printed at program start 193 # banner to be printed at program start
185 BANNER = """msodde %s - http://decalage.info/python/oletools 194 BANNER = """msodde %s - http://decalage.info/python/oletools
186 THIS IS WORK IN PROGRESS - Check updates regularly! 195 THIS IS WORK IN PROGRESS - Check updates regularly!
@@ -316,11 +325,26 @@ def process_args(cmd_line_args=None): @@ -316,11 +325,26 @@ def process_args(cmd_line_args=None):
316 parser = ArgParserWithBanner(description='A python tool to detect and extract DDE links in MS Office files') 325 parser = ArgParserWithBanner(description='A python tool to detect and extract DDE links in MS Office files')
317 parser.add_argument("filepath", help="path of the file to be analyzed", 326 parser.add_argument("filepath", help="path of the file to be analyzed",
318 type=existing_file, metavar='FILE') 327 type=existing_file, metavar='FILE')
319 - parser.add_argument("--json", '-j', action='store_true', 328 + parser.add_argument('-j', "--json", action='store_true',
320 help="Output in json format. Do not use with -ldebug") 329 help="Output in json format. Do not use with -ldebug")
321 parser.add_argument("--nounquote", help="don't unquote values",action='store_true') 330 parser.add_argument("--nounquote", help="don't unquote values",action='store_true')
322 parser.add_argument('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, 331 parser.add_argument('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
323 help="logging level debug/info/warning/error/critical (default=%(default)s)") 332 help="logging level debug/info/warning/error/critical (default=%(default)s)")
  333 + filter_group = parser.add_argument_group(
  334 + title='Filter which OpenXML field commands are returned',
  335 + description='Only applies to OpenXML (e.g. docx), not to OLE (e.g. '
  336 + '.doc). These options are mutually exclusive, last option '
  337 + 'found on command line overwrites earlier ones.')
  338 + filter_group.add_argument('-d', '--dde-only', action='store_const',
  339 + dest='field_filter_mode', const=FIELD_FILTER_DDE,
  340 + help='Return only DDE and DDEAUTO fields')
  341 + filter_group.add_argument('-f', '--filter', action='store_const',
  342 + dest='field_filter_mode', const=FIELD_FILTER_BLACKLIST,
  343 + help='Return all fields except harmless ones like PAGE')
  344 + filter_group.add_argument('-a', '--all-fields', action='store_const',
  345 + dest='field_filter_mode', const=FIELD_FILTER_ALL,
  346 + help='Return all fields, irrespective of their contents')
  347 + parser.set_defaults(field_filter_mode=FIELD_FILTER_DEFAULT)
324 348
325 return parser.parse_args(cmd_line_args) 349 return parser.parse_args(cmd_line_args)
326 350
@@ -469,7 +493,7 @@ def process_ole(filepath): @@ -469,7 +493,7 @@ def process_ole(filepath):
469 return u'\n'.join(text_parts) 493 return u'\n'.join(text_parts)
470 494
471 495
472 -def process_openxml(filepath): 496 +def process_openxml(filepath, field_filter_mode=None):
473 log.debug('process_openxml') 497 log.debug('process_openxml')
474 all_fields = [] 498 all_fields = []
475 z = zipfile.ZipFile(filepath) 499 z = zipfile.ZipFile(filepath)
@@ -483,7 +507,23 @@ def process_openxml(filepath): @@ -483,7 +507,23 @@ def process_openxml(filepath):
483 # print(f) 507 # print(f)
484 all_fields.extend(fields) 508 all_fields.extend(fields)
485 z.close() 509 z.close()
486 - return u'\n'.join(all_fields) 510 +
  511 + # apply field command filter
  512 + log.debug('filtering with mode "{0}"'.format(field_filter_mode))
  513 + if field_filter_mode in (FIELD_FILTER_ALL, None):
  514 + clean_fields = all_fields
  515 + elif field_filter_mode == FIELD_FILTER_DDE:
  516 + clean_fields = [field for field in all_fields
  517 + if FIELD_DDE_REGEX.match(field)]
  518 + elif field_filter_mode == FIELD_FILTER_BLACKLIST:
  519 + # check if fields are acceptable and should not be returned
  520 + clean_fields = [field for field in all_fields
  521 + if not field_is_blacklisted(field.strip())]
  522 + else:
  523 + raise ValueError('Unexpected field_filter_mode: "{0}"'
  524 + .format(field_filter_mode))
  525 +
  526 + return u'\n'.join(clean_fields)
487 527
488 def process_xml(data): 528 def process_xml(data):
489 # parse the XML data: 529 # parse the XML data:
@@ -532,7 +572,7 @@ def process_xml(data): @@ -532,7 +572,7 @@ def process_xml(data):
532 # concatenate the attribute of the field, if present: 572 # concatenate the attribute of the field, if present:
533 if elem.attrib is not None: 573 if elem.attrib is not None:
534 fields.append(elem.attrib[ATTR_W_INSTR]) 574 fields.append(elem.attrib[ATTR_W_INSTR])
535 - 575 +
536 return fields 576 return fields
537 577
538 def unquote(field): 578 def unquote(field):
@@ -637,12 +677,12 @@ def field_is_blacklisted(contents): @@ -637,12 +677,12 @@ def field_is_blacklisted(contents):
637 return True 677 return True
638 678
639 679
640 -def process_file(filepath): 680 +def process_file(filepath, field_filter_mode=None):
641 """ decides to either call process_openxml or process_ole """ 681 """ decides to either call process_openxml or process_ole """
642 if olefile.isOleFile(filepath): 682 if olefile.isOleFile(filepath):
643 return process_ole(filepath) 683 return process_ole(filepath)
644 else: 684 else:
645 - return process_openxml(filepath) 685 + return process_openxml(filepath, field_filter_mode)
646 686
647 687
648 #=== MAIN ================================================================= 688 #=== MAIN =================================================================
@@ -684,7 +724,7 @@ def main(cmd_line_args=None): @@ -684,7 +724,7 @@ def main(cmd_line_args=None):
684 text = '' 724 text = ''
685 return_code = 1 725 return_code = 1
686 try: 726 try:
687 - text = process_file(args.filepath) 727 + text = process_file(args.filepath, args.field_filter_mode)
688 return_code = 0 728 return_code = 0
689 except Exception as exc: 729 except Exception as exc:
690 if args.json: 730 if args.json: