Commit f4b2a30418f84885de190731ed5db39103eec6e7

Authored by Christian Herdtweck
1 parent 74b6c1ce

ooxml: Fixes from pylint and pep8

One actually was an error (missing return from is_ooxml)
Showing 1 changed file with 24 additions and 28 deletions
oletools/ooxml.py
... ... @@ -16,11 +16,11 @@ TODO: "xml2003" == "flatopc"?
16 16 """
17 17  
18 18 import sys
19   -from oletools.common.log_helper import log_helper
20 19 from zipfile import ZipFile, BadZipfile, is_zipfile
21 20 from os.path import splitext
22 21 import io
23 22 import re
  23 +from oletools.common.log_helper import log_helper
24 24  
25 25 # import lxml or ElementTree for XML parsing:
26 26 try:
... ... @@ -107,16 +107,14 @@ def debug_str(elem):
107 107 text = u', '.join(parts)
108 108 if len(text) > 150:
109 109 return text[:147] + u'...]'
110   - else:
111   - return text + u']'
  110 + return text + u']'
112 111  
113 112  
114 113 def isstr(some_var):
115 114 """ version-independent test for isinstance(some_var, (str, unicode)) """
116 115 if sys.version_info.major == 2:
117 116 return isinstance(some_var, basestring) # true for str and unicode
118   - else:
119   - return isinstance(some_var, str) # there is no unicode
  117 + return isinstance(some_var, str) # there is no unicode
120 118  
121 119  
122 120 ###############################################################################
... ... @@ -136,10 +134,9 @@ def get_type(filename):
136 134 prog_id = match.groups()[0]
137 135 if prog_id == WORD_XML_PROG_ID:
138 136 return DOCTYPE_WORD_XML
139   - elif prog_id == EXCEL_XML_PROG_ID:
  137 + if prog_id == EXCEL_XML_PROG_ID:
140 138 return DOCTYPE_EXCEL_XML
141   - else:
142   - return DOCTYPE_NONE
  139 + return DOCTYPE_NONE
143 140  
144 141 is_doc = False
145 142 is_xls = False
... ... @@ -169,9 +166,8 @@ def get_type(filename):
169 166 return DOCTYPE_POWERPOINT
170 167 if not is_doc and not is_xls and not is_ppt:
171 168 return DOCTYPE_NONE
172   - else:
173   - logger.warning('Encountered contradictory content types')
174   - return DOCTYPE_MIXED
  169 + logger.warning('Encountered contradictory content types')
  170 + return DOCTYPE_MIXED
175 171  
176 172  
177 173 def is_ooxml(filename):
... ... @@ -184,6 +180,7 @@ def is_ooxml(filename):
184 180 return False
185 181 if doctype == DOCTYPE_NONE:
186 182 return False
  183 + return True
187 184  
188 185  
189 186 ###############################################################################
... ... @@ -223,6 +220,7 @@ class ZipSubFile(object):
223 220 See also (and maybe could some day merge with):
224 221 ppt_record_parser.IterStream; also: oleobj.FakeFile
225 222 """
  223 + CHUNK_SIZE = 4096
226 224  
227 225 def __init__(self, container, filename, mode='r', size=None):
228 226 """ remember all necessary vars but do not open yet """
... ... @@ -260,7 +258,7 @@ class ZipSubFile(object):
260 258 # print('ZipSubFile: opened; size={}'.format(self.size))
261 259 return self
262 260  
263   - def write(self, *args, **kwargs): # pylint: disable=unused-argument,no-self-use
  261 + def write(self, *args, **kwargs):
264 262 """ write is not allowed """
265 263 raise IOError('writing not implemented')
266 264  
... ... @@ -318,10 +316,9 @@ class ZipSubFile(object):
318 316 """ helper for seek: skip forward by given amount using read() """
319 317 # print('ZipSubFile: seek by skipping {} bytes starting at {}'
320 318 # .format(self.pos, to_skip))
321   - CHUNK_SIZE = 4096
322   - n_chunks, leftover = divmod(to_skip, CHUNK_SIZE)
  319 + n_chunks, leftover = divmod(to_skip, self.CHUNK_SIZE)
323 320 for _ in range(n_chunks):
324   - self.read(CHUNK_SIZE) # just read and discard
  321 + self.read(self.CHUNK_SIZE) # just read and discard
325 322 self.read(leftover)
326 323 # print('ZipSubFile: seek by skipping done, pos now {}'
327 324 # .format(self.pos))
... ... @@ -424,8 +421,7 @@ class XmlParser(object):
424 421 if match:
425 422 self._is_single_xml = True
426 423 return True
427   - if not match:
428   - raise BadOOXML(self.filename, 'is no zip and has no prog_id')
  424 + raise BadOOXML(self.filename, 'is no zip and has no prog_id')
429 425  
430 426 def iter_files(self, args=None):
431 427 """ Find files in zip or just give single xml file """
... ... @@ -509,7 +505,7 @@ class XmlParser(object):
509 505 if event == 'start':
510 506 if elem.tag in want_tags:
511 507 logger.debug('remember start of tag {0} at {1}'
512   - .format(elem.tag, depth))
  508 + .format(elem.tag, depth))
513 509 inside_tags.append((elem.tag, depth))
514 510 depth += 1
515 511 continue
... ... @@ -525,18 +521,18 @@ class XmlParser(object):
525 521 inside_tags.pop()
526 522 else:
527 523 logger.error('found end for wanted tag {0} '
528   - 'but last start tag {1} does not'
529   - ' match'.format(curr_tag,
530   - inside_tags[-1]))
  524 + 'but last start tag {1} does not'
  525 + ' match'.format(curr_tag,
  526 + inside_tags[-1]))
531 527 # try to recover: close all deeper tags
532 528 while inside_tags and \
533 529 inside_tags[-1][1] >= depth:
534 530 logger.debug('recover: pop {0}'
535   - .format(inside_tags[-1]))
  531 + .format(inside_tags[-1]))
536 532 inside_tags.pop()
537 533 except IndexError: # no inside_tag[-1]
538 534 logger.error('found end of {0} at depth {1} but '
539   - 'no start event')
  535 + 'no start event')
540 536 # yield element
541 537 if is_wanted or not want_tags:
542 538 yield subfile, elem, depth
... ... @@ -581,15 +577,15 @@ class XmlParser(object):
581 577 if extension.startswith('.'):
582 578 extension = extension[1:]
583 579 defaults.append((extension, elem.attrib['ContentType']))
584   - logger.debug('found content type for extension {0[0]}: {0[1]}'
585   - .format(defaults[-1]))
  580 + logger.debug('found content type for extension {0[0]}: '
  581 + '{0[1]}'.format(defaults[-1]))
586 582 elif elem.tag.endswith('Override'):
587 583 subfile = elem.attrib['PartName']
588 584 if subfile.startswith('/'):
589 585 subfile = subfile[1:]
590 586 files.append((subfile, elem.attrib['ContentType']))
591   - logger.debug('found content type for subfile {0[0]}: {0[1]}'
592   - .format(files[-1]))
  587 + logger.debug('found content type for subfile {0[0]}: '
  588 + '{0[1]}'.format(files[-1]))
593 589 except BadOOXML as oo_err:
594 590 if oo_err.more_info.startswith('invalid subfile') and \
595 591 FILE_CONTENT_TYPES in oo_err.more_info:
... ... @@ -614,7 +610,7 @@ class XmlParser(object):
614 610 """
615 611 if not self.did_iter_all:
616 612 logger.warning('Did not iterate through complete file. '
617   - 'Should run iter_xml() without args, first.')
  613 + 'Should run iter_xml() without args, first.')
618 614 if not self.subfiles_no_xml:
619 615 return
620 616  
... ...