Commit 27c1bacab497283484afabe2bc47116d0976171a
Committed by
Philippe Lagadec
1 parent
2142e7a6
msodde: Argument to select OOXML fields: dde, all, filtered
Showing
1 changed file
with
48 additions
and
8 deletions
oletools/msodde.py
| ... | ... | @@ -51,8 +51,9 @@ from __future__ import print_function |
| 51 | 51 | # 2017-10-25 CH: - add json output |
| 52 | 52 | # 2017-10-25 CH: - parse doc |
| 53 | 53 | # PL: - added logging |
| 54 | +# 2017-11-10 CH: - added field blacklist and corresponding cmd line args | |
| 54 | 55 | |
| 55 | -__version__ = '0.52dev4' | |
| 56 | +__version__ = '0.52dev5' | |
| 56 | 57 | |
| 57 | 58 | #------------------------------------------------------------------------------ |
| 58 | 59 | # TODO: field codes can be in headers/footers/comments - parse these |
| ... | ... | @@ -181,6 +182,14 @@ FIELD_BLACKLIST = ( |
| 181 | 182 | ('USERNAME', 0, 1, '', '', 'string'), |
| 182 | 183 | ) |
| 183 | 184 | |
| 185 | +FIELD_DDE_REGEX = re.compile(r'^\s*dde(auto)?\s+', re.I) | |
| 186 | + | |
| 187 | +FIELD_FILTER_DDE = 'only dde' | |
| 188 | +FIELD_FILTER_BLACKLIST = 'exclude blacklisted' | |
| 189 | +FIELD_FILTER_ALL = 'keep all' | |
| 190 | +FIELD_FILTER_DEFAULT = FIELD_FILTER_BLACKLIST | |
| 191 | + | |
| 192 | + | |
| 184 | 193 | # banner to be printed at program start |
| 185 | 194 | BANNER = """msodde %s - http://decalage.info/python/oletools |
| 186 | 195 | THIS IS WORK IN PROGRESS - Check updates regularly! |
| ... | ... | @@ -316,11 +325,26 @@ def process_args(cmd_line_args=None): |
| 316 | 325 | parser = ArgParserWithBanner(description='A python tool to detect and extract DDE links in MS Office files') |
| 317 | 326 | parser.add_argument("filepath", help="path of the file to be analyzed", |
| 318 | 327 | type=existing_file, metavar='FILE') |
| 319 | - parser.add_argument("--json", '-j', action='store_true', | |
| 328 | + parser.add_argument('-j', "--json", action='store_true', | |
| 320 | 329 | help="Output in json format. Do not use with -ldebug") |
| 321 | 330 | parser.add_argument("--nounquote", help="don't unquote values",action='store_true') |
| 322 | 331 | parser.add_argument('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, |
| 323 | 332 | help="logging level debug/info/warning/error/critical (default=%(default)s)") |
| 333 | + filter_group = parser.add_argument_group( | |
| 334 | + title='Filter which OpenXML field commands are returned', | |
| 335 | + description='Only applies to OpenXML (e.g. docx), not to OLE (e.g. ' | |
| 336 | + '.doc). These options are mutually exclusive, last option ' | |
| 337 | + 'found on command line overwrites earlier ones.') | |
| 338 | + filter_group.add_argument('-d', '--dde-only', action='store_const', | |
| 339 | + dest='field_filter_mode', const=FIELD_FILTER_DDE, | |
| 340 | + help='Return only DDE and DDEAUTO fields') | |
| 341 | + filter_group.add_argument('-f', '--filter', action='store_const', | |
| 342 | + dest='field_filter_mode', const=FIELD_FILTER_BLACKLIST, | |
| 343 | + help='Return all fields except harmless ones like PAGE') | |
| 344 | + filter_group.add_argument('-a', '--all-fields', action='store_const', | |
| 345 | + dest='field_filter_mode', const=FIELD_FILTER_ALL, | |
| 346 | + help='Return all fields, irrespective of their contents') | |
| 347 | + parser.set_defaults(field_filter_mode=FIELD_FILTER_DEFAULT) | |
| 324 | 348 | |
| 325 | 349 | return parser.parse_args(cmd_line_args) |
| 326 | 350 | |
| ... | ... | @@ -469,7 +493,7 @@ def process_ole(filepath): |
| 469 | 493 | return u'\n'.join(text_parts) |
| 470 | 494 | |
| 471 | 495 | |
| 472 | -def process_openxml(filepath): | |
| 496 | +def process_openxml(filepath, field_filter_mode=None): | |
| 473 | 497 | log.debug('process_openxml') |
| 474 | 498 | all_fields = [] |
| 475 | 499 | z = zipfile.ZipFile(filepath) |
| ... | ... | @@ -483,7 +507,23 @@ def process_openxml(filepath): |
| 483 | 507 | # print(f) |
| 484 | 508 | all_fields.extend(fields) |
| 485 | 509 | z.close() |
| 486 | - return u'\n'.join(all_fields) | |
| 510 | + | |
| 511 | + # apply field command filter | |
| 512 | + log.debug('filtering with mode "{0}"'.format(field_filter_mode)) | |
| 513 | + if field_filter_mode in (FIELD_FILTER_ALL, None): | |
| 514 | + clean_fields = all_fields | |
| 515 | + elif field_filter_mode == FIELD_FILTER_DDE: | |
| 516 | + clean_fields = [field for field in all_fields | |
| 517 | + if FIELD_DDE_REGEX.match(field)] | |
| 518 | + elif field_filter_mode == FIELD_FILTER_BLACKLIST: | |
| 519 | + # check if fields are acceptable and should not be returned | |
| 520 | + clean_fields = [field for field in all_fields | |
| 521 | + if not field_is_blacklisted(field.strip())] | |
| 522 | + else: | |
| 523 | + raise ValueError('Unexpected field_filter_mode: "{0}"' | |
| 524 | + .format(field_filter_mode)) | |
| 525 | + | |
| 526 | + return u'\n'.join(clean_fields) | |
| 487 | 527 | |
| 488 | 528 | def process_xml(data): |
| 489 | 529 | # parse the XML data: |
| ... | ... | @@ -532,7 +572,7 @@ def process_xml(data): |
| 532 | 572 | # concatenate the attribute of the field, if present: |
| 533 | 573 | if elem.attrib is not None: |
| 534 | 574 | fields.append(elem.attrib[ATTR_W_INSTR]) |
| 535 | - | |
| 575 | + | |
| 536 | 576 | return fields |
| 537 | 577 | |
| 538 | 578 | def unquote(field): |
| ... | ... | @@ -637,12 +677,12 @@ def field_is_blacklisted(contents): |
| 637 | 677 | return True |
| 638 | 678 | |
| 639 | 679 | |
| 640 | -def process_file(filepath): | |
| 680 | +def process_file(filepath, field_filter_mode=None): | |
| 641 | 681 | """ decides to either call process_openxml or process_ole """ |
| 642 | 682 | if olefile.isOleFile(filepath): |
| 643 | 683 | return process_ole(filepath) |
| 644 | 684 | else: |
| 645 | - return process_openxml(filepath) | |
| 685 | + return process_openxml(filepath, field_filter_mode) | |
| 646 | 686 | |
| 647 | 687 | |
| 648 | 688 | #=== MAIN ================================================================= |
| ... | ... | @@ -684,7 +724,7 @@ def main(cmd_line_args=None): |
| 684 | 724 | text = '' |
| 685 | 725 | return_code = 1 |
| 686 | 726 | try: |
| 687 | - text = process_file(args.filepath) | |
| 727 | + text = process_file(args.filepath, args.field_filter_mode) | |
| 688 | 728 | return_code = 0 |
| 689 | 729 | except Exception as exc: |
| 690 | 730 | if args.json: | ... | ... |