Commit 43fc21ce645656989af755f8d27328b0999cbfca

Authored by Samir Aguiar
Committed by Christian Herdtweck
1 parent 6c43d28d

ooxml: use log helper

ooxml was changed to use our custom log helper
so that it can also output JSON messages whenever
a caller that uses the -j flag has JSON output
set.
Showing 1 changed file with 23 additions and 19 deletions
oletools/ooxml.py
@@ -14,7 +14,7 @@ TODO: may have to tell apart single xml types: office2003 looks much different @@ -14,7 +14,7 @@ TODO: may have to tell apart single xml types: office2003 looks much different
14 """ 14 """
15 15
16 import sys 16 import sys
17 -import logging 17 +from oletools.util.log_helper import log_helper
18 from zipfile import ZipFile, BadZipfile, is_zipfile 18 from zipfile import ZipFile, BadZipfile, is_zipfile
19 from os.path import splitext 19 from os.path import splitext
20 import io 20 import io
@@ -27,6 +27,7 @@ try: @@ -27,6 +27,7 @@ try:
27 except ImportError: 27 except ImportError:
28 import xml.etree.cElementTree as ET 28 import xml.etree.cElementTree as ET
29 29
  30 +logger = log_helper.get_or_create_silent_logger('ooxml')
30 31
31 #: subfiles that have to be part of every ooxml file 32 #: subfiles that have to be part of every ooxml file
32 FILE_CONTENT_TYPES = '[Content_Types].xml' 33 FILE_CONTENT_TYPES = '[Content_Types].xml'
@@ -142,7 +143,7 @@ def get_type(filename): @@ -142,7 +143,7 @@ def get_type(filename):
142 is_xls = False 143 is_xls = False
143 is_ppt = False 144 is_ppt = False
144 for _, elem, _ in parser.iter_xml(FILE_CONTENT_TYPES): 145 for _, elem, _ in parser.iter_xml(FILE_CONTENT_TYPES):
145 - logging.debug(u' ' + debug_str(elem)) 146 + logger.debug(u' ' + debug_str(elem))
146 try: 147 try:
147 content_type = elem.attrib['ContentType'] 148 content_type = elem.attrib['ContentType']
148 except KeyError: # ContentType not an attr 149 except KeyError: # ContentType not an attr
@@ -160,7 +161,7 @@ def get_type(filename): @@ -160,7 +161,7 @@ def get_type(filename):
160 if not is_doc and not is_xls and not is_ppt: 161 if not is_doc and not is_xls and not is_ppt:
161 return DOCTYPE_NONE 162 return DOCTYPE_NONE
162 else: 163 else:
163 - logging.warning('Encountered contradictory content types') 164 + logger.warning('Encountered contradictory content types')
164 return DOCTYPE_MIXED 165 return DOCTYPE_MIXED
165 166
166 167
@@ -220,7 +221,7 @@ class ZipSubFile(object): @@ -220,7 +221,7 @@ class ZipSubFile(object):
220 self.name = filename 221 self.name = filename
221 if size is None: 222 if size is None:
222 self.size = container.getinfo(filename).file_size 223 self.size = container.getinfo(filename).file_size
223 - logging.debug('zip stream has size {0}'.format(self.size)) 224 + logger.debug('zip stream has size {0}'.format(self.size))
224 else: 225 else:
225 self.size = size 226 self.size = size
226 if 'w' in mode.lower(): 227 if 'w' in mode.lower():
@@ -484,10 +485,10 @@ class XmlParser(object): @@ -484,10 +485,10 @@ class XmlParser(object):
484 want_tags = [] 485 want_tags = []
485 elif isstr(tags): 486 elif isstr(tags):
486 want_tags = [tags, ] 487 want_tags = [tags, ]
487 - logging.debug('looking for tags: {0}'.format(tags)) 488 + logger.debug('looking for tags: {0}'.format(tags))
488 else: 489 else:
489 want_tags = tags 490 want_tags = tags
490 - logging.debug('looking for tags: {0}'.format(tags)) 491 + logger.debug('looking for tags: {0}'.format(tags))
491 492
492 for subfile, handle in self.iter_files(subfiles): 493 for subfile, handle in self.iter_files(subfiles):
493 events = ('start', 'end') 494 events = ('start', 'end')
@@ -499,7 +500,7 @@ class XmlParser(object): @@ -499,7 +500,7 @@ class XmlParser(object):
499 continue 500 continue
500 if event == 'start': 501 if event == 'start':
501 if elem.tag in want_tags: 502 if elem.tag in want_tags:
502 - logging.debug('remember start of tag {0} at {1}' 503 + logger.debug('remember start of tag {0} at {1}'
503 .format(elem.tag, depth)) 504 .format(elem.tag, depth))
504 inside_tags.append((elem.tag, depth)) 505 inside_tags.append((elem.tag, depth))
505 depth += 1 506 depth += 1
@@ -515,18 +516,18 @@ class XmlParser(object): @@ -515,18 +516,18 @@ class XmlParser(object):
515 if inside_tags[-1] == curr_tag: 516 if inside_tags[-1] == curr_tag:
516 inside_tags.pop() 517 inside_tags.pop()
517 else: 518 else:
518 - logging.error('found end for wanted tag {0} ' 519 + logger.error('found end for wanted tag {0} '
519 'but last start tag {1} does not' 520 'but last start tag {1} does not'
520 ' match'.format(curr_tag, 521 ' match'.format(curr_tag,
521 inside_tags[-1])) 522 inside_tags[-1]))
522 # try to recover: close all deeper tags 523 # try to recover: close all deeper tags
523 while inside_tags and \ 524 while inside_tags and \
524 inside_tags[-1][1] >= depth: 525 inside_tags[-1][1] >= depth:
525 - logging.debug('recover: pop {0}' 526 + logger.debug('recover: pop {0}'
526 .format(inside_tags[-1])) 527 .format(inside_tags[-1]))
527 inside_tags.pop() 528 inside_tags.pop()
528 except IndexError: # no inside_tag[-1] 529 except IndexError: # no inside_tag[-1]
529 - logging.error('found end of {0} at depth {1} but ' 530 + logger.error('found end of {0} at depth {1} but '
530 'no start event') 531 'no start event')
531 # yield element 532 # yield element
532 if is_wanted or not want_tags: 533 if is_wanted or not want_tags:
@@ -543,12 +544,12 @@ class XmlParser(object): @@ -543,12 +544,12 @@ class XmlParser(object):
543 if subfile is None: # this is no zip subfile but single xml 544 if subfile is None: # this is no zip subfile but single xml
544 raise BadOOXML(self.filename, 'is neither zip nor xml') 545 raise BadOOXML(self.filename, 'is neither zip nor xml')
545 elif subfile.endswith('.xml'): 546 elif subfile.endswith('.xml'):
546 - logger = logging.warning 547 + log = logger.warning
547 else: 548 else:
548 - logger = logging.debug  
549 - logger(' xml-parsing for {0} failed ({1}). '  
550 - .format(subfile, err) +  
551 - 'Run iter_non_xml to investigate.') 549 + log = logger.debug
  550 + log(' xml-parsing for {0} failed ({1}). '
  551 + .format(subfile, err) +
  552 + 'Run iter_non_xml to investigate.')
552 assert(depth == 0) 553 assert(depth == 0)
553 554
554 def get_content_types(self): 555 def get_content_types(self):
@@ -571,14 +572,14 @@ class XmlParser(object): @@ -571,14 +572,14 @@ class XmlParser(object):
571 if extension.startswith('.'): 572 if extension.startswith('.'):
572 extension = extension[1:] 573 extension = extension[1:]
573 defaults.append((extension, elem.attrib['ContentType'])) 574 defaults.append((extension, elem.attrib['ContentType']))
574 - logging.debug('found content type for extension {0[0]}: {0[1]}' 575 + logger.debug('found content type for extension {0[0]}: {0[1]}'
575 .format(defaults[-1])) 576 .format(defaults[-1]))
576 elif elem.tag.endswith('Override'): 577 elif elem.tag.endswith('Override'):
577 subfile = elem.attrib['PartName'] 578 subfile = elem.attrib['PartName']
578 if subfile.startswith('/'): 579 if subfile.startswith('/'):
579 subfile = subfile[1:] 580 subfile = subfile[1:]
580 files.append((subfile, elem.attrib['ContentType'])) 581 files.append((subfile, elem.attrib['ContentType']))
581 - logging.debug('found content type for subfile {0[0]}: {0[1]}' 582 + logger.debug('found content type for subfile {0[0]}: {0[1]}'
582 .format(files[-1])) 583 .format(files[-1]))
583 return dict(files), dict(defaults) 584 return dict(files), dict(defaults)
584 585
@@ -595,7 +596,7 @@ class XmlParser(object): @@ -595,7 +596,7 @@ class XmlParser(object):
595 To handle binary parts of an xlsb file, use xls_parser.parse_xlsb_part 596 To handle binary parts of an xlsb file, use xls_parser.parse_xlsb_part
596 """ 597 """
597 if not self.did_iter_all: 598 if not self.did_iter_all:
598 - logging.warning('Did not iterate through complete file. ' 599 + logger.warning('Did not iterate through complete file. '
599 'Should run iter_xml() without args, first.') 600 'Should run iter_xml() without args, first.')
600 if not self.subfiles_no_xml: 601 if not self.subfiles_no_xml:
601 return 602 return
@@ -628,7 +629,7 @@ def test(): @@ -628,7 +629,7 @@ def test():
628 629
629 see module doc for more info 630 see module doc for more info
630 """ 631 """
631 - logging.basicConfig(level=logging.DEBUG) 632 + log_helper.enable_logging(False, logger.DEBUG)
632 if len(sys.argv) != 2: 633 if len(sys.argv) != 2:
633 print(u'To test this code, give me a single file as arg') 634 print(u'To test this code, give me a single file as arg')
634 return 2 635 return 2
@@ -647,6 +648,9 @@ def test(): @@ -647,6 +648,9 @@ def test():
647 if index > 100: 648 if index > 100:
648 print(u'...') 649 print(u'...')
649 break 650 break
  651 +
  652 + log_helper.end_logging()
  653 +
650 return 0 654 return 0
651 655
652 656