Commit f4b2a30418f84885de190731ed5db39103eec6e7
1 parent
74b6c1ce
ooxml: Fixes from pylint and pep8
One actually was an error (missing return from is_ooxml)
Showing
1 changed file
with
24 additions
and
28 deletions
oletools/ooxml.py
| ... | ... | @@ -16,11 +16,11 @@ TODO: "xml2003" == "flatopc"? |
| 16 | 16 | """ |
| 17 | 17 | |
| 18 | 18 | import sys |
| 19 | -from oletools.common.log_helper import log_helper | |
| 20 | 19 | from zipfile import ZipFile, BadZipfile, is_zipfile |
| 21 | 20 | from os.path import splitext |
| 22 | 21 | import io |
| 23 | 22 | import re |
| 23 | +from oletools.common.log_helper import log_helper | |
| 24 | 24 | |
| 25 | 25 | # import lxml or ElementTree for XML parsing: |
| 26 | 26 | try: |
| ... | ... | @@ -107,16 +107,14 @@ def debug_str(elem): |
| 107 | 107 | text = u', '.join(parts) |
| 108 | 108 | if len(text) > 150: |
| 109 | 109 | return text[:147] + u'...]' |
| 110 | - else: | |
| 111 | - return text + u']' | |
| 110 | + return text + u']' | |
| 112 | 111 | |
| 113 | 112 | |
| 114 | 113 | def isstr(some_var): |
| 115 | 114 | """ version-independent test for isinstance(some_var, (str, unicode)) """ |
| 116 | 115 | if sys.version_info.major == 2: |
| 117 | 116 | return isinstance(some_var, basestring) # true for str and unicode |
| 118 | - else: | |
| 119 | - return isinstance(some_var, str) # there is no unicode | |
| 117 | + return isinstance(some_var, str) # there is no unicode | |
| 120 | 118 | |
| 121 | 119 | |
| 122 | 120 | ############################################################################### |
| ... | ... | @@ -136,10 +134,9 @@ def get_type(filename): |
| 136 | 134 | prog_id = match.groups()[0] |
| 137 | 135 | if prog_id == WORD_XML_PROG_ID: |
| 138 | 136 | return DOCTYPE_WORD_XML |
| 139 | - elif prog_id == EXCEL_XML_PROG_ID: | |
| 137 | + if prog_id == EXCEL_XML_PROG_ID: | |
| 140 | 138 | return DOCTYPE_EXCEL_XML |
| 141 | - else: | |
| 142 | - return DOCTYPE_NONE | |
| 139 | + return DOCTYPE_NONE | |
| 143 | 140 | |
| 144 | 141 | is_doc = False |
| 145 | 142 | is_xls = False |
| ... | ... | @@ -169,9 +166,8 @@ def get_type(filename): |
| 169 | 166 | return DOCTYPE_POWERPOINT |
| 170 | 167 | if not is_doc and not is_xls and not is_ppt: |
| 171 | 168 | return DOCTYPE_NONE |
| 172 | - else: | |
| 173 | - logger.warning('Encountered contradictory content types') | |
| 174 | - return DOCTYPE_MIXED | |
| 169 | + logger.warning('Encountered contradictory content types') | |
| 170 | + return DOCTYPE_MIXED | |
| 175 | 171 | |
| 176 | 172 | |
| 177 | 173 | def is_ooxml(filename): |
| ... | ... | @@ -184,6 +180,7 @@ def is_ooxml(filename): |
| 184 | 180 | return False |
| 185 | 181 | if doctype == DOCTYPE_NONE: |
| 186 | 182 | return False |
| 183 | + return True | |
| 187 | 184 | |
| 188 | 185 | |
| 189 | 186 | ############################################################################### |
| ... | ... | @@ -223,6 +220,7 @@ class ZipSubFile(object): |
| 223 | 220 | See also (and maybe could some day merge with): |
| 224 | 221 | ppt_record_parser.IterStream; also: oleobj.FakeFile |
| 225 | 222 | """ |
| 223 | + CHUNK_SIZE = 4096 | |
| 226 | 224 | |
| 227 | 225 | def __init__(self, container, filename, mode='r', size=None): |
| 228 | 226 | """ remember all necessary vars but do not open yet """ |
| ... | ... | @@ -260,7 +258,7 @@ class ZipSubFile(object): |
| 260 | 258 | # print('ZipSubFile: opened; size={}'.format(self.size)) |
| 261 | 259 | return self |
| 262 | 260 | |
| 263 | - def write(self, *args, **kwargs): # pylint: disable=unused-argument,no-self-use | |
| 261 | + def write(self, *args, **kwargs): | |
| 264 | 262 | """ write is not allowed """ |
| 265 | 263 | raise IOError('writing not implemented') |
| 266 | 264 | |
| ... | ... | @@ -318,10 +316,9 @@ class ZipSubFile(object): |
| 318 | 316 | """ helper for seek: skip forward by given amount using read() """ |
| 319 | 317 | # print('ZipSubFile: seek by skipping {} bytes starting at {}' |
| 320 | 318 | # .format(self.pos, to_skip)) |
| 321 | - CHUNK_SIZE = 4096 | |
| 322 | - n_chunks, leftover = divmod(to_skip, CHUNK_SIZE) | |
| 319 | + n_chunks, leftover = divmod(to_skip, self.CHUNK_SIZE) | |
| 323 | 320 | for _ in range(n_chunks): |
| 324 | - self.read(CHUNK_SIZE) # just read and discard | |
| 321 | + self.read(self.CHUNK_SIZE) # just read and discard | |
| 325 | 322 | self.read(leftover) |
| 326 | 323 | # print('ZipSubFile: seek by skipping done, pos now {}' |
| 327 | 324 | # .format(self.pos)) |
| ... | ... | @@ -424,8 +421,7 @@ class XmlParser(object): |
| 424 | 421 | if match: |
| 425 | 422 | self._is_single_xml = True |
| 426 | 423 | return True |
| 427 | - if not match: | |
| 428 | - raise BadOOXML(self.filename, 'is no zip and has no prog_id') | |
| 424 | + raise BadOOXML(self.filename, 'is no zip and has no prog_id') | |
| 429 | 425 | |
| 430 | 426 | def iter_files(self, args=None): |
| 431 | 427 | """ Find files in zip or just give single xml file """ |
| ... | ... | @@ -509,7 +505,7 @@ class XmlParser(object): |
| 509 | 505 | if event == 'start': |
| 510 | 506 | if elem.tag in want_tags: |
| 511 | 507 | logger.debug('remember start of tag {0} at {1}' |
| 512 | - .format(elem.tag, depth)) | |
| 508 | + .format(elem.tag, depth)) | |
| 513 | 509 | inside_tags.append((elem.tag, depth)) |
| 514 | 510 | depth += 1 |
| 515 | 511 | continue |
| ... | ... | @@ -525,18 +521,18 @@ class XmlParser(object): |
| 525 | 521 | inside_tags.pop() |
| 526 | 522 | else: |
| 527 | 523 | logger.error('found end for wanted tag {0} ' |
| 528 | - 'but last start tag {1} does not' | |
| 529 | - ' match'.format(curr_tag, | |
| 530 | - inside_tags[-1])) | |
| 524 | + 'but last start tag {1} does not' | |
| 525 | + ' match'.format(curr_tag, | |
| 526 | + inside_tags[-1])) | |
| 531 | 527 | # try to recover: close all deeper tags |
| 532 | 528 | while inside_tags and \ |
| 533 | 529 | inside_tags[-1][1] >= depth: |
| 534 | 530 | logger.debug('recover: pop {0}' |
| 535 | - .format(inside_tags[-1])) | |
| 531 | + .format(inside_tags[-1])) | |
| 536 | 532 | inside_tags.pop() |
| 537 | 533 | except IndexError: # no inside_tag[-1] |
| 538 | 534 | logger.error('found end of {0} at depth {1} but ' |
| 539 | - 'no start event') | |
| 535 | + 'no start event') | |
| 540 | 536 | # yield element |
| 541 | 537 | if is_wanted or not want_tags: |
| 542 | 538 | yield subfile, elem, depth |
| ... | ... | @@ -581,15 +577,15 @@ class XmlParser(object): |
| 581 | 577 | if extension.startswith('.'): |
| 582 | 578 | extension = extension[1:] |
| 583 | 579 | defaults.append((extension, elem.attrib['ContentType'])) |
| 584 | - logger.debug('found content type for extension {0[0]}: {0[1]}' | |
| 585 | - .format(defaults[-1])) | |
| 580 | + logger.debug('found content type for extension {0[0]}: ' | |
| 581 | + '{0[1]}'.format(defaults[-1])) | |
| 586 | 582 | elif elem.tag.endswith('Override'): |
| 587 | 583 | subfile = elem.attrib['PartName'] |
| 588 | 584 | if subfile.startswith('/'): |
| 589 | 585 | subfile = subfile[1:] |
| 590 | 586 | files.append((subfile, elem.attrib['ContentType'])) |
| 591 | - logger.debug('found content type for subfile {0[0]}: {0[1]}' | |
| 592 | - .format(files[-1])) | |
| 587 | + logger.debug('found content type for subfile {0[0]}: ' | |
| 588 | + '{0[1]}'.format(files[-1])) | |
| 593 | 589 | except BadOOXML as oo_err: |
| 594 | 590 | if oo_err.more_info.startswith('invalid subfile') and \ |
| 595 | 591 | FILE_CONTENT_TYPES in oo_err.more_info: |
| ... | ... | @@ -614,7 +610,7 @@ class XmlParser(object): |
| 614 | 610 | """ |
| 615 | 611 | if not self.did_iter_all: |
| 616 | 612 | logger.warning('Did not iterate through complete file. ' |
| 617 | - 'Should run iter_xml() without args, first.') | |
| 613 | + 'Should run iter_xml() without args, first.') | |
| 618 | 614 | if not self.subfiles_no_xml: |
| 619 | 615 | return |
| 620 | 616 | ... | ... |