Commit 27c1bacab497283484afabe2bc47116d0976171a
Committed by
Philippe Lagadec
1 parent
2142e7a6
msodde: Argument to select OOXML fields: dde, all, filtered
Showing
1 changed file
with
48 additions
and
8 deletions
oletools/msodde.py
| @@ -51,8 +51,9 @@ from __future__ import print_function | @@ -51,8 +51,9 @@ from __future__ import print_function | ||
| 51 | # 2017-10-25 CH: - add json output | 51 | # 2017-10-25 CH: - add json output |
| 52 | # 2017-10-25 CH: - parse doc | 52 | # 2017-10-25 CH: - parse doc |
| 53 | # PL: - added logging | 53 | # PL: - added logging |
| 54 | +# 2017-11-10 CH: - added field blacklist and corresponding cmd line args | ||
| 54 | 55 | ||
| 55 | -__version__ = '0.52dev4' | 56 | +__version__ = '0.52dev5' |
| 56 | 57 | ||
| 57 | #------------------------------------------------------------------------------ | 58 | #------------------------------------------------------------------------------ |
| 58 | # TODO: field codes can be in headers/footers/comments - parse these | 59 | # TODO: field codes can be in headers/footers/comments - parse these |
| @@ -181,6 +182,14 @@ FIELD_BLACKLIST = ( | @@ -181,6 +182,14 @@ FIELD_BLACKLIST = ( | ||
| 181 | ('USERNAME', 0, 1, '', '', 'string'), | 182 | ('USERNAME', 0, 1, '', '', 'string'), |
| 182 | ) | 183 | ) |
| 183 | 184 | ||
| 185 | +FIELD_DDE_REGEX = re.compile(r'^\s*dde(auto)?\s+', re.I) | ||
| 186 | + | ||
| 187 | +FIELD_FILTER_DDE = 'only dde' | ||
| 188 | +FIELD_FILTER_BLACKLIST = 'exclude blacklisted' | ||
| 189 | +FIELD_FILTER_ALL = 'keep all' | ||
| 190 | +FIELD_FILTER_DEFAULT = FIELD_FILTER_BLACKLIST | ||
| 191 | + | ||
| 192 | + | ||
| 184 | # banner to be printed at program start | 193 | # banner to be printed at program start |
| 185 | BANNER = """msodde %s - http://decalage.info/python/oletools | 194 | BANNER = """msodde %s - http://decalage.info/python/oletools |
| 186 | THIS IS WORK IN PROGRESS - Check updates regularly! | 195 | THIS IS WORK IN PROGRESS - Check updates regularly! |
| @@ -316,11 +325,26 @@ def process_args(cmd_line_args=None): | @@ -316,11 +325,26 @@ def process_args(cmd_line_args=None): | ||
| 316 | parser = ArgParserWithBanner(description='A python tool to detect and extract DDE links in MS Office files') | 325 | parser = ArgParserWithBanner(description='A python tool to detect and extract DDE links in MS Office files') |
| 317 | parser.add_argument("filepath", help="path of the file to be analyzed", | 326 | parser.add_argument("filepath", help="path of the file to be analyzed", |
| 318 | type=existing_file, metavar='FILE') | 327 | type=existing_file, metavar='FILE') |
| 319 | - parser.add_argument("--json", '-j', action='store_true', | 328 | + parser.add_argument('-j', "--json", action='store_true', |
| 320 | help="Output in json format. Do not use with -ldebug") | 329 | help="Output in json format. Do not use with -ldebug") |
| 321 | parser.add_argument("--nounquote", help="don't unquote values",action='store_true') | 330 | parser.add_argument("--nounquote", help="don't unquote values",action='store_true') |
| 322 | parser.add_argument('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, | 331 | parser.add_argument('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, |
| 323 | help="logging level debug/info/warning/error/critical (default=%(default)s)") | 332 | help="logging level debug/info/warning/error/critical (default=%(default)s)") |
| 333 | + filter_group = parser.add_argument_group( | ||
| 334 | + title='Filter which OpenXML field commands are returned', | ||
| 335 | + description='Only applies to OpenXML (e.g. docx), not to OLE (e.g. ' | ||
| 336 | + '.doc). These options are mutually exclusive, last option ' | ||
| 337 | + 'found on command line overwrites earlier ones.') | ||
| 338 | + filter_group.add_argument('-d', '--dde-only', action='store_const', | ||
| 339 | + dest='field_filter_mode', const=FIELD_FILTER_DDE, | ||
| 340 | + help='Return only DDE and DDEAUTO fields') | ||
| 341 | + filter_group.add_argument('-f', '--filter', action='store_const', | ||
| 342 | + dest='field_filter_mode', const=FIELD_FILTER_BLACKLIST, | ||
| 343 | + help='Return all fields except harmless ones like PAGE') | ||
| 344 | + filter_group.add_argument('-a', '--all-fields', action='store_const', | ||
| 345 | + dest='field_filter_mode', const=FIELD_FILTER_ALL, | ||
| 346 | + help='Return all fields, irrespective of their contents') | ||
| 347 | + parser.set_defaults(field_filter_mode=FIELD_FILTER_DEFAULT) | ||
| 324 | 348 | ||
| 325 | return parser.parse_args(cmd_line_args) | 349 | return parser.parse_args(cmd_line_args) |
| 326 | 350 | ||
| @@ -469,7 +493,7 @@ def process_ole(filepath): | @@ -469,7 +493,7 @@ def process_ole(filepath): | ||
| 469 | return u'\n'.join(text_parts) | 493 | return u'\n'.join(text_parts) |
| 470 | 494 | ||
| 471 | 495 | ||
| 472 | -def process_openxml(filepath): | 496 | +def process_openxml(filepath, field_filter_mode=None): |
| 473 | log.debug('process_openxml') | 497 | log.debug('process_openxml') |
| 474 | all_fields = [] | 498 | all_fields = [] |
| 475 | z = zipfile.ZipFile(filepath) | 499 | z = zipfile.ZipFile(filepath) |
| @@ -483,7 +507,23 @@ def process_openxml(filepath): | @@ -483,7 +507,23 @@ def process_openxml(filepath): | ||
| 483 | # print(f) | 507 | # print(f) |
| 484 | all_fields.extend(fields) | 508 | all_fields.extend(fields) |
| 485 | z.close() | 509 | z.close() |
| 486 | - return u'\n'.join(all_fields) | 510 | + |
| 511 | + # apply field command filter | ||
| 512 | + log.debug('filtering with mode "{0}"'.format(field_filter_mode)) | ||
| 513 | + if field_filter_mode in (FIELD_FILTER_ALL, None): | ||
| 514 | + clean_fields = all_fields | ||
| 515 | + elif field_filter_mode == FIELD_FILTER_DDE: | ||
| 516 | + clean_fields = [field for field in all_fields | ||
| 517 | + if FIELD_DDE_REGEX.match(field)] | ||
| 518 | + elif field_filter_mode == FIELD_FILTER_BLACKLIST: | ||
| 519 | + # check if fields are acceptable and should not be returned | ||
| 520 | + clean_fields = [field for field in all_fields | ||
| 521 | + if not field_is_blacklisted(field.strip())] | ||
| 522 | + else: | ||
| 523 | + raise ValueError('Unexpected field_filter_mode: "{0}"' | ||
| 524 | + .format(field_filter_mode)) | ||
| 525 | + | ||
| 526 | + return u'\n'.join(clean_fields) | ||
| 487 | 527 | ||
| 488 | def process_xml(data): | 528 | def process_xml(data): |
| 489 | # parse the XML data: | 529 | # parse the XML data: |
| @@ -532,7 +572,7 @@ def process_xml(data): | @@ -532,7 +572,7 @@ def process_xml(data): | ||
| 532 | # concatenate the attribute of the field, if present: | 572 | # concatenate the attribute of the field, if present: |
| 533 | if elem.attrib is not None: | 573 | if elem.attrib is not None: |
| 534 | fields.append(elem.attrib[ATTR_W_INSTR]) | 574 | fields.append(elem.attrib[ATTR_W_INSTR]) |
| 535 | - | 575 | + |
| 536 | return fields | 576 | return fields |
| 537 | 577 | ||
| 538 | def unquote(field): | 578 | def unquote(field): |
| @@ -637,12 +677,12 @@ def field_is_blacklisted(contents): | @@ -637,12 +677,12 @@ def field_is_blacklisted(contents): | ||
| 637 | return True | 677 | return True |
| 638 | 678 | ||
| 639 | 679 | ||
| 640 | -def process_file(filepath): | 680 | +def process_file(filepath, field_filter_mode=None): |
| 641 | """ decides to either call process_openxml or process_ole """ | 681 | """ decides to either call process_openxml or process_ole """ |
| 642 | if olefile.isOleFile(filepath): | 682 | if olefile.isOleFile(filepath): |
| 643 | return process_ole(filepath) | 683 | return process_ole(filepath) |
| 644 | else: | 684 | else: |
| 645 | - return process_openxml(filepath) | 685 | + return process_openxml(filepath, field_filter_mode) |
| 646 | 686 | ||
| 647 | 687 | ||
| 648 | #=== MAIN ================================================================= | 688 | #=== MAIN ================================================================= |
| @@ -684,7 +724,7 @@ def main(cmd_line_args=None): | @@ -684,7 +724,7 @@ def main(cmd_line_args=None): | ||
| 684 | text = '' | 724 | text = '' |
| 685 | return_code = 1 | 725 | return_code = 1 |
| 686 | try: | 726 | try: |
| 687 | - text = process_file(args.filepath) | 727 | + text = process_file(args.filepath, args.field_filter_mode) |
| 688 | return_code = 0 | 728 | return_code = 0 |
| 689 | except Exception as exc: | 729 | except Exception as exc: |
| 690 | if args.json: | 730 | if args.json: |