Commit f4b2a30418f84885de190731ed5db39103eec6e7

Authored by Christian Herdtweck
1 parent 74b6c1ce

ooxml: Fixes from pylint and pep8

One actually was an error (missing return from is_ooxml)
Showing 1 changed file with 24 additions and 28 deletions
oletools/ooxml.py
@@ -16,11 +16,11 @@ TODO: "xml2003" == "flatopc"? @@ -16,11 +16,11 @@ TODO: "xml2003" == "flatopc"?
16 """ 16 """
17 17
18 import sys 18 import sys
19 -from oletools.common.log_helper import log_helper  
20 from zipfile import ZipFile, BadZipfile, is_zipfile 19 from zipfile import ZipFile, BadZipfile, is_zipfile
21 from os.path import splitext 20 from os.path import splitext
22 import io 21 import io
23 import re 22 import re
  23 +from oletools.common.log_helper import log_helper
24 24
25 # import lxml or ElementTree for XML parsing: 25 # import lxml or ElementTree for XML parsing:
26 try: 26 try:
@@ -107,16 +107,14 @@ def debug_str(elem): @@ -107,16 +107,14 @@ def debug_str(elem):
107 text = u', '.join(parts) 107 text = u', '.join(parts)
108 if len(text) > 150: 108 if len(text) > 150:
109 return text[:147] + u'...]' 109 return text[:147] + u'...]'
110 - else:  
111 - return text + u']' 110 + return text + u']'
112 111
113 112
114 def isstr(some_var): 113 def isstr(some_var):
115 """ version-independent test for isinstance(some_var, (str, unicode)) """ 114 """ version-independent test for isinstance(some_var, (str, unicode)) """
116 if sys.version_info.major == 2: 115 if sys.version_info.major == 2:
117 return isinstance(some_var, basestring) # true for str and unicode 116 return isinstance(some_var, basestring) # true for str and unicode
118 - else:  
119 - return isinstance(some_var, str) # there is no unicode 117 + return isinstance(some_var, str) # there is no unicode
120 118
121 119
122 ############################################################################### 120 ###############################################################################
@@ -136,10 +134,9 @@ def get_type(filename): @@ -136,10 +134,9 @@ def get_type(filename):
136 prog_id = match.groups()[0] 134 prog_id = match.groups()[0]
137 if prog_id == WORD_XML_PROG_ID: 135 if prog_id == WORD_XML_PROG_ID:
138 return DOCTYPE_WORD_XML 136 return DOCTYPE_WORD_XML
139 - elif prog_id == EXCEL_XML_PROG_ID: 137 + if prog_id == EXCEL_XML_PROG_ID:
140 return DOCTYPE_EXCEL_XML 138 return DOCTYPE_EXCEL_XML
141 - else:  
142 - return DOCTYPE_NONE 139 + return DOCTYPE_NONE
143 140
144 is_doc = False 141 is_doc = False
145 is_xls = False 142 is_xls = False
@@ -169,9 +166,8 @@ def get_type(filename): @@ -169,9 +166,8 @@ def get_type(filename):
169 return DOCTYPE_POWERPOINT 166 return DOCTYPE_POWERPOINT
170 if not is_doc and not is_xls and not is_ppt: 167 if not is_doc and not is_xls and not is_ppt:
171 return DOCTYPE_NONE 168 return DOCTYPE_NONE
172 - else:  
173 - logger.warning('Encountered contradictory content types')  
174 - return DOCTYPE_MIXED 169 + logger.warning('Encountered contradictory content types')
  170 + return DOCTYPE_MIXED
175 171
176 172
177 def is_ooxml(filename): 173 def is_ooxml(filename):
@@ -184,6 +180,7 @@ def is_ooxml(filename): @@ -184,6 +180,7 @@ def is_ooxml(filename):
184 return False 180 return False
185 if doctype == DOCTYPE_NONE: 181 if doctype == DOCTYPE_NONE:
186 return False 182 return False
  183 + return True
187 184
188 185
189 ############################################################################### 186 ###############################################################################
@@ -223,6 +220,7 @@ class ZipSubFile(object): @@ -223,6 +220,7 @@ class ZipSubFile(object):
223 See also (and maybe could some day merge with): 220 See also (and maybe could some day merge with):
224 ppt_record_parser.IterStream; also: oleobj.FakeFile 221 ppt_record_parser.IterStream; also: oleobj.FakeFile
225 """ 222 """
  223 + CHUNK_SIZE = 4096
226 224
227 def __init__(self, container, filename, mode='r', size=None): 225 def __init__(self, container, filename, mode='r', size=None):
228 """ remember all necessary vars but do not open yet """ 226 """ remember all necessary vars but do not open yet """
@@ -260,7 +258,7 @@ class ZipSubFile(object): @@ -260,7 +258,7 @@ class ZipSubFile(object):
260 # print('ZipSubFile: opened; size={}'.format(self.size)) 258 # print('ZipSubFile: opened; size={}'.format(self.size))
261 return self 259 return self
262 260
263 - def write(self, *args, **kwargs): # pylint: disable=unused-argument,no-self-use 261 + def write(self, *args, **kwargs):
264 """ write is not allowed """ 262 """ write is not allowed """
265 raise IOError('writing not implemented') 263 raise IOError('writing not implemented')
266 264
@@ -318,10 +316,9 @@ class ZipSubFile(object): @@ -318,10 +316,9 @@ class ZipSubFile(object):
318 """ helper for seek: skip forward by given amount using read() """ 316 """ helper for seek: skip forward by given amount using read() """
319 # print('ZipSubFile: seek by skipping {} bytes starting at {}' 317 # print('ZipSubFile: seek by skipping {} bytes starting at {}'
320 # .format(self.pos, to_skip)) 318 # .format(self.pos, to_skip))
321 - CHUNK_SIZE = 4096  
322 - n_chunks, leftover = divmod(to_skip, CHUNK_SIZE) 319 + n_chunks, leftover = divmod(to_skip, self.CHUNK_SIZE)
323 for _ in range(n_chunks): 320 for _ in range(n_chunks):
324 - self.read(CHUNK_SIZE) # just read and discard 321 + self.read(self.CHUNK_SIZE) # just read and discard
325 self.read(leftover) 322 self.read(leftover)
326 # print('ZipSubFile: seek by skipping done, pos now {}' 323 # print('ZipSubFile: seek by skipping done, pos now {}'
327 # .format(self.pos)) 324 # .format(self.pos))
@@ -424,8 +421,7 @@ class XmlParser(object): @@ -424,8 +421,7 @@ class XmlParser(object):
424 if match: 421 if match:
425 self._is_single_xml = True 422 self._is_single_xml = True
426 return True 423 return True
427 - if not match:  
428 - raise BadOOXML(self.filename, 'is no zip and has no prog_id') 424 + raise BadOOXML(self.filename, 'is no zip and has no prog_id')
429 425
430 def iter_files(self, args=None): 426 def iter_files(self, args=None):
431 """ Find files in zip or just give single xml file """ 427 """ Find files in zip or just give single xml file """
@@ -509,7 +505,7 @@ class XmlParser(object): @@ -509,7 +505,7 @@ class XmlParser(object):
509 if event == 'start': 505 if event == 'start':
510 if elem.tag in want_tags: 506 if elem.tag in want_tags:
511 logger.debug('remember start of tag {0} at {1}' 507 logger.debug('remember start of tag {0} at {1}'
512 - .format(elem.tag, depth)) 508 + .format(elem.tag, depth))
513 inside_tags.append((elem.tag, depth)) 509 inside_tags.append((elem.tag, depth))
514 depth += 1 510 depth += 1
515 continue 511 continue
@@ -525,18 +521,18 @@ class XmlParser(object): @@ -525,18 +521,18 @@ class XmlParser(object):
525 inside_tags.pop() 521 inside_tags.pop()
526 else: 522 else:
527 logger.error('found end for wanted tag {0} ' 523 logger.error('found end for wanted tag {0} '
528 - 'but last start tag {1} does not'  
529 - ' match'.format(curr_tag,  
530 - inside_tags[-1])) 524 + 'but last start tag {1} does not'
  525 + ' match'.format(curr_tag,
  526 + inside_tags[-1]))
531 # try to recover: close all deeper tags 527 # try to recover: close all deeper tags
532 while inside_tags and \ 528 while inside_tags and \
533 inside_tags[-1][1] >= depth: 529 inside_tags[-1][1] >= depth:
534 logger.debug('recover: pop {0}' 530 logger.debug('recover: pop {0}'
535 - .format(inside_tags[-1])) 531 + .format(inside_tags[-1]))
536 inside_tags.pop() 532 inside_tags.pop()
537 except IndexError: # no inside_tag[-1] 533 except IndexError: # no inside_tag[-1]
538 logger.error('found end of {0} at depth {1} but ' 534 logger.error('found end of {0} at depth {1} but '
539 - 'no start event') 535 + 'no start event')
540 # yield element 536 # yield element
541 if is_wanted or not want_tags: 537 if is_wanted or not want_tags:
542 yield subfile, elem, depth 538 yield subfile, elem, depth
@@ -581,15 +577,15 @@ class XmlParser(object): @@ -581,15 +577,15 @@ class XmlParser(object):
581 if extension.startswith('.'): 577 if extension.startswith('.'):
582 extension = extension[1:] 578 extension = extension[1:]
583 defaults.append((extension, elem.attrib['ContentType'])) 579 defaults.append((extension, elem.attrib['ContentType']))
584 - logger.debug('found content type for extension {0[0]}: {0[1]}'  
585 - .format(defaults[-1])) 580 + logger.debug('found content type for extension {0[0]}: '
  581 + '{0[1]}'.format(defaults[-1]))
586 elif elem.tag.endswith('Override'): 582 elif elem.tag.endswith('Override'):
587 subfile = elem.attrib['PartName'] 583 subfile = elem.attrib['PartName']
588 if subfile.startswith('/'): 584 if subfile.startswith('/'):
589 subfile = subfile[1:] 585 subfile = subfile[1:]
590 files.append((subfile, elem.attrib['ContentType'])) 586 files.append((subfile, elem.attrib['ContentType']))
591 - logger.debug('found content type for subfile {0[0]}: {0[1]}'  
592 - .format(files[-1])) 587 + logger.debug('found content type for subfile {0[0]}: '
  588 + '{0[1]}'.format(files[-1]))
593 except BadOOXML as oo_err: 589 except BadOOXML as oo_err:
594 if oo_err.more_info.startswith('invalid subfile') and \ 590 if oo_err.more_info.startswith('invalid subfile') and \
595 FILE_CONTENT_TYPES in oo_err.more_info: 591 FILE_CONTENT_TYPES in oo_err.more_info:
@@ -614,7 +610,7 @@ class XmlParser(object): @@ -614,7 +610,7 @@ class XmlParser(object):
614 """ 610 """
615 if not self.did_iter_all: 611 if not self.did_iter_all:
616 logger.warning('Did not iterate through complete file. ' 612 logger.warning('Did not iterate through complete file. '
617 - 'Should run iter_xml() without args, first.') 613 + 'Should run iter_xml() without args, first.')
618 if not self.subfiles_no_xml: 614 if not self.subfiles_no_xml:
619 return 615 return
620 616