Commit 43fc21ce645656989af755f8d27328b0999cbfca
Committed by
Christian Herdtweck
1 parent
6c43d28d
ooxml: use log helper
ooxml was changed to use our custom log helper so that it can also output JSON messages whenever a caller that uses the -j flag has JSON output set.
Showing
1 changed file
with
23 additions
and
19 deletions
oletools/ooxml.py
| @@ -14,7 +14,7 @@ TODO: may have to tell apart single xml types: office2003 looks much different | @@ -14,7 +14,7 @@ TODO: may have to tell apart single xml types: office2003 looks much different | ||
| 14 | """ | 14 | """ |
| 15 | 15 | ||
| 16 | import sys | 16 | import sys |
| 17 | -import logging | 17 | +from oletools.util.log_helper import log_helper |
| 18 | from zipfile import ZipFile, BadZipfile, is_zipfile | 18 | from zipfile import ZipFile, BadZipfile, is_zipfile |
| 19 | from os.path import splitext | 19 | from os.path import splitext |
| 20 | import io | 20 | import io |
| @@ -27,6 +27,7 @@ try: | @@ -27,6 +27,7 @@ try: | ||
| 27 | except ImportError: | 27 | except ImportError: |
| 28 | import xml.etree.cElementTree as ET | 28 | import xml.etree.cElementTree as ET |
| 29 | 29 | ||
| 30 | +logger = log_helper.get_or_create_silent_logger('ooxml') | ||
| 30 | 31 | ||
| 31 | #: subfiles that have to be part of every ooxml file | 32 | #: subfiles that have to be part of every ooxml file |
| 32 | FILE_CONTENT_TYPES = '[Content_Types].xml' | 33 | FILE_CONTENT_TYPES = '[Content_Types].xml' |
| @@ -142,7 +143,7 @@ def get_type(filename): | @@ -142,7 +143,7 @@ def get_type(filename): | ||
| 142 | is_xls = False | 143 | is_xls = False |
| 143 | is_ppt = False | 144 | is_ppt = False |
| 144 | for _, elem, _ in parser.iter_xml(FILE_CONTENT_TYPES): | 145 | for _, elem, _ in parser.iter_xml(FILE_CONTENT_TYPES): |
| 145 | - logging.debug(u' ' + debug_str(elem)) | 146 | + logger.debug(u' ' + debug_str(elem)) |
| 146 | try: | 147 | try: |
| 147 | content_type = elem.attrib['ContentType'] | 148 | content_type = elem.attrib['ContentType'] |
| 148 | except KeyError: # ContentType not an attr | 149 | except KeyError: # ContentType not an attr |
| @@ -160,7 +161,7 @@ def get_type(filename): | @@ -160,7 +161,7 @@ def get_type(filename): | ||
| 160 | if not is_doc and not is_xls and not is_ppt: | 161 | if not is_doc and not is_xls and not is_ppt: |
| 161 | return DOCTYPE_NONE | 162 | return DOCTYPE_NONE |
| 162 | else: | 163 | else: |
| 163 | - logging.warning('Encountered contradictory content types') | 164 | + logger.warning('Encountered contradictory content types') |
| 164 | return DOCTYPE_MIXED | 165 | return DOCTYPE_MIXED |
| 165 | 166 | ||
| 166 | 167 | ||
| @@ -220,7 +221,7 @@ class ZipSubFile(object): | @@ -220,7 +221,7 @@ class ZipSubFile(object): | ||
| 220 | self.name = filename | 221 | self.name = filename |
| 221 | if size is None: | 222 | if size is None: |
| 222 | self.size = container.getinfo(filename).file_size | 223 | self.size = container.getinfo(filename).file_size |
| 223 | - logging.debug('zip stream has size {0}'.format(self.size)) | 224 | + logger.debug('zip stream has size {0}'.format(self.size)) |
| 224 | else: | 225 | else: |
| 225 | self.size = size | 226 | self.size = size |
| 226 | if 'w' in mode.lower(): | 227 | if 'w' in mode.lower(): |
| @@ -484,10 +485,10 @@ class XmlParser(object): | @@ -484,10 +485,10 @@ class XmlParser(object): | ||
| 484 | want_tags = [] | 485 | want_tags = [] |
| 485 | elif isstr(tags): | 486 | elif isstr(tags): |
| 486 | want_tags = [tags, ] | 487 | want_tags = [tags, ] |
| 487 | - logging.debug('looking for tags: {0}'.format(tags)) | 488 | + logger.debug('looking for tags: {0}'.format(tags)) |
| 488 | else: | 489 | else: |
| 489 | want_tags = tags | 490 | want_tags = tags |
| 490 | - logging.debug('looking for tags: {0}'.format(tags)) | 491 | + logger.debug('looking for tags: {0}'.format(tags)) |
| 491 | 492 | ||
| 492 | for subfile, handle in self.iter_files(subfiles): | 493 | for subfile, handle in self.iter_files(subfiles): |
| 493 | events = ('start', 'end') | 494 | events = ('start', 'end') |
| @@ -499,7 +500,7 @@ class XmlParser(object): | @@ -499,7 +500,7 @@ class XmlParser(object): | ||
| 499 | continue | 500 | continue |
| 500 | if event == 'start': | 501 | if event == 'start': |
| 501 | if elem.tag in want_tags: | 502 | if elem.tag in want_tags: |
| 502 | - logging.debug('remember start of tag {0} at {1}' | 503 | + logger.debug('remember start of tag {0} at {1}' |
| 503 | .format(elem.tag, depth)) | 504 | .format(elem.tag, depth)) |
| 504 | inside_tags.append((elem.tag, depth)) | 505 | inside_tags.append((elem.tag, depth)) |
| 505 | depth += 1 | 506 | depth += 1 |
| @@ -515,18 +516,18 @@ class XmlParser(object): | @@ -515,18 +516,18 @@ class XmlParser(object): | ||
| 515 | if inside_tags[-1] == curr_tag: | 516 | if inside_tags[-1] == curr_tag: |
| 516 | inside_tags.pop() | 517 | inside_tags.pop() |
| 517 | else: | 518 | else: |
| 518 | - logging.error('found end for wanted tag {0} ' | 519 | + logger.error('found end for wanted tag {0} ' |
| 519 | 'but last start tag {1} does not' | 520 | 'but last start tag {1} does not' |
| 520 | ' match'.format(curr_tag, | 521 | ' match'.format(curr_tag, |
| 521 | inside_tags[-1])) | 522 | inside_tags[-1])) |
| 522 | # try to recover: close all deeper tags | 523 | # try to recover: close all deeper tags |
| 523 | while inside_tags and \ | 524 | while inside_tags and \ |
| 524 | inside_tags[-1][1] >= depth: | 525 | inside_tags[-1][1] >= depth: |
| 525 | - logging.debug('recover: pop {0}' | 526 | + logger.debug('recover: pop {0}' |
| 526 | .format(inside_tags[-1])) | 527 | .format(inside_tags[-1])) |
| 527 | inside_tags.pop() | 528 | inside_tags.pop() |
| 528 | except IndexError: # no inside_tag[-1] | 529 | except IndexError: # no inside_tag[-1] |
| 529 | - logging.error('found end of {0} at depth {1} but ' | 530 | + logger.error('found end of {0} at depth {1} but ' |
| 530 | 'no start event') | 531 | 'no start event') |
| 531 | # yield element | 532 | # yield element |
| 532 | if is_wanted or not want_tags: | 533 | if is_wanted or not want_tags: |
| @@ -543,12 +544,12 @@ class XmlParser(object): | @@ -543,12 +544,12 @@ class XmlParser(object): | ||
| 543 | if subfile is None: # this is no zip subfile but single xml | 544 | if subfile is None: # this is no zip subfile but single xml |
| 544 | raise BadOOXML(self.filename, 'is neither zip nor xml') | 545 | raise BadOOXML(self.filename, 'is neither zip nor xml') |
| 545 | elif subfile.endswith('.xml'): | 546 | elif subfile.endswith('.xml'): |
| 546 | - logger = logging.warning | 547 | + log = logger.warning |
| 547 | else: | 548 | else: |
| 548 | - logger = logging.debug | ||
| 549 | - logger(' xml-parsing for {0} failed ({1}). ' | ||
| 550 | - .format(subfile, err) + | ||
| 551 | - 'Run iter_non_xml to investigate.') | 549 | + log = logger.debug |
| 550 | + log(' xml-parsing for {0} failed ({1}). ' | ||
| 551 | + .format(subfile, err) + | ||
| 552 | + 'Run iter_non_xml to investigate.') | ||
| 552 | assert(depth == 0) | 553 | assert(depth == 0) |
| 553 | 554 | ||
| 554 | def get_content_types(self): | 555 | def get_content_types(self): |
| @@ -571,14 +572,14 @@ class XmlParser(object): | @@ -571,14 +572,14 @@ class XmlParser(object): | ||
| 571 | if extension.startswith('.'): | 572 | if extension.startswith('.'): |
| 572 | extension = extension[1:] | 573 | extension = extension[1:] |
| 573 | defaults.append((extension, elem.attrib['ContentType'])) | 574 | defaults.append((extension, elem.attrib['ContentType'])) |
| 574 | - logging.debug('found content type for extension {0[0]}: {0[1]}' | 575 | + logger.debug('found content type for extension {0[0]}: {0[1]}' |
| 575 | .format(defaults[-1])) | 576 | .format(defaults[-1])) |
| 576 | elif elem.tag.endswith('Override'): | 577 | elif elem.tag.endswith('Override'): |
| 577 | subfile = elem.attrib['PartName'] | 578 | subfile = elem.attrib['PartName'] |
| 578 | if subfile.startswith('/'): | 579 | if subfile.startswith('/'): |
| 579 | subfile = subfile[1:] | 580 | subfile = subfile[1:] |
| 580 | files.append((subfile, elem.attrib['ContentType'])) | 581 | files.append((subfile, elem.attrib['ContentType'])) |
| 581 | - logging.debug('found content type for subfile {0[0]}: {0[1]}' | 582 | + logger.debug('found content type for subfile {0[0]}: {0[1]}' |
| 582 | .format(files[-1])) | 583 | .format(files[-1])) |
| 583 | return dict(files), dict(defaults) | 584 | return dict(files), dict(defaults) |
| 584 | 585 | ||
| @@ -595,7 +596,7 @@ class XmlParser(object): | @@ -595,7 +596,7 @@ class XmlParser(object): | ||
| 595 | To handle binary parts of an xlsb file, use xls_parser.parse_xlsb_part | 596 | To handle binary parts of an xlsb file, use xls_parser.parse_xlsb_part |
| 596 | """ | 597 | """ |
| 597 | if not self.did_iter_all: | 598 | if not self.did_iter_all: |
| 598 | - logging.warning('Did not iterate through complete file. ' | 599 | + logger.warning('Did not iterate through complete file. ' |
| 599 | 'Should run iter_xml() without args, first.') | 600 | 'Should run iter_xml() without args, first.') |
| 600 | if not self.subfiles_no_xml: | 601 | if not self.subfiles_no_xml: |
| 601 | return | 602 | return |
| @@ -628,7 +629,7 @@ def test(): | @@ -628,7 +629,7 @@ def test(): | ||
| 628 | 629 | ||
| 629 | see module doc for more info | 630 | see module doc for more info |
| 630 | """ | 631 | """ |
| 631 | - logging.basicConfig(level=logging.DEBUG) | 632 | + log_helper.enable_logging(False, logger.DEBUG) |
| 632 | if len(sys.argv) != 2: | 633 | if len(sys.argv) != 2: |
| 633 | print(u'To test this code, give me a single file as arg') | 634 | print(u'To test this code, give me a single file as arg') |
| 634 | return 2 | 635 | return 2 |
| @@ -647,6 +648,9 @@ def test(): | @@ -647,6 +648,9 @@ def test(): | ||
| 647 | if index > 100: | 648 | if index > 100: |
| 648 | print(u'...') | 649 | print(u'...') |
| 649 | break | 650 | break |
| 651 | + | ||
| 652 | + log_helper.end_logging() | ||
| 653 | + | ||
| 650 | return 0 | 654 | return 0 |
| 651 | 655 | ||
| 652 | 656 |