Commit f4b2a30418f84885de190731ed5db39103eec6e7
1 parent
74b6c1ce
ooxml: Fixes from pylint and pep8
One actually was an error (missing return from is_ooxml)
Showing
1 changed file
with
24 additions
and
28 deletions
oletools/ooxml.py
| @@ -16,11 +16,11 @@ TODO: "xml2003" == "flatopc"? | @@ -16,11 +16,11 @@ TODO: "xml2003" == "flatopc"? | ||
| 16 | """ | 16 | """ |
| 17 | 17 | ||
| 18 | import sys | 18 | import sys |
| 19 | -from oletools.common.log_helper import log_helper | ||
| 20 | from zipfile import ZipFile, BadZipfile, is_zipfile | 19 | from zipfile import ZipFile, BadZipfile, is_zipfile |
| 21 | from os.path import splitext | 20 | from os.path import splitext |
| 22 | import io | 21 | import io |
| 23 | import re | 22 | import re |
| 23 | +from oletools.common.log_helper import log_helper | ||
| 24 | 24 | ||
| 25 | # import lxml or ElementTree for XML parsing: | 25 | # import lxml or ElementTree for XML parsing: |
| 26 | try: | 26 | try: |
| @@ -107,16 +107,14 @@ def debug_str(elem): | @@ -107,16 +107,14 @@ def debug_str(elem): | ||
| 107 | text = u', '.join(parts) | 107 | text = u', '.join(parts) |
| 108 | if len(text) > 150: | 108 | if len(text) > 150: |
| 109 | return text[:147] + u'...]' | 109 | return text[:147] + u'...]' |
| 110 | - else: | ||
| 111 | - return text + u']' | 110 | + return text + u']' |
| 112 | 111 | ||
| 113 | 112 | ||
| 114 | def isstr(some_var): | 113 | def isstr(some_var): |
| 115 | """ version-independent test for isinstance(some_var, (str, unicode)) """ | 114 | """ version-independent test for isinstance(some_var, (str, unicode)) """ |
| 116 | if sys.version_info.major == 2: | 115 | if sys.version_info.major == 2: |
| 117 | return isinstance(some_var, basestring) # true for str and unicode | 116 | return isinstance(some_var, basestring) # true for str and unicode |
| 118 | - else: | ||
| 119 | - return isinstance(some_var, str) # there is no unicode | 117 | + return isinstance(some_var, str) # there is no unicode |
| 120 | 118 | ||
| 121 | 119 | ||
| 122 | ############################################################################### | 120 | ############################################################################### |
| @@ -136,10 +134,9 @@ def get_type(filename): | @@ -136,10 +134,9 @@ def get_type(filename): | ||
| 136 | prog_id = match.groups()[0] | 134 | prog_id = match.groups()[0] |
| 137 | if prog_id == WORD_XML_PROG_ID: | 135 | if prog_id == WORD_XML_PROG_ID: |
| 138 | return DOCTYPE_WORD_XML | 136 | return DOCTYPE_WORD_XML |
| 139 | - elif prog_id == EXCEL_XML_PROG_ID: | 137 | + if prog_id == EXCEL_XML_PROG_ID: |
| 140 | return DOCTYPE_EXCEL_XML | 138 | return DOCTYPE_EXCEL_XML |
| 141 | - else: | ||
| 142 | - return DOCTYPE_NONE | 139 | + return DOCTYPE_NONE |
| 143 | 140 | ||
| 144 | is_doc = False | 141 | is_doc = False |
| 145 | is_xls = False | 142 | is_xls = False |
| @@ -169,9 +166,8 @@ def get_type(filename): | @@ -169,9 +166,8 @@ def get_type(filename): | ||
| 169 | return DOCTYPE_POWERPOINT | 166 | return DOCTYPE_POWERPOINT |
| 170 | if not is_doc and not is_xls and not is_ppt: | 167 | if not is_doc and not is_xls and not is_ppt: |
| 171 | return DOCTYPE_NONE | 168 | return DOCTYPE_NONE |
| 172 | - else: | ||
| 173 | - logger.warning('Encountered contradictory content types') | ||
| 174 | - return DOCTYPE_MIXED | 169 | + logger.warning('Encountered contradictory content types') |
| 170 | + return DOCTYPE_MIXED | ||
| 175 | 171 | ||
| 176 | 172 | ||
| 177 | def is_ooxml(filename): | 173 | def is_ooxml(filename): |
| @@ -184,6 +180,7 @@ def is_ooxml(filename): | @@ -184,6 +180,7 @@ def is_ooxml(filename): | ||
| 184 | return False | 180 | return False |
| 185 | if doctype == DOCTYPE_NONE: | 181 | if doctype == DOCTYPE_NONE: |
| 186 | return False | 182 | return False |
| 183 | + return True | ||
| 187 | 184 | ||
| 188 | 185 | ||
| 189 | ############################################################################### | 186 | ############################################################################### |
| @@ -223,6 +220,7 @@ class ZipSubFile(object): | @@ -223,6 +220,7 @@ class ZipSubFile(object): | ||
| 223 | See also (and maybe could some day merge with): | 220 | See also (and maybe could some day merge with): |
| 224 | ppt_record_parser.IterStream; also: oleobj.FakeFile | 221 | ppt_record_parser.IterStream; also: oleobj.FakeFile |
| 225 | """ | 222 | """ |
| 223 | + CHUNK_SIZE = 4096 | ||
| 226 | 224 | ||
| 227 | def __init__(self, container, filename, mode='r', size=None): | 225 | def __init__(self, container, filename, mode='r', size=None): |
| 228 | """ remember all necessary vars but do not open yet """ | 226 | """ remember all necessary vars but do not open yet """ |
| @@ -260,7 +258,7 @@ class ZipSubFile(object): | @@ -260,7 +258,7 @@ class ZipSubFile(object): | ||
| 260 | # print('ZipSubFile: opened; size={}'.format(self.size)) | 258 | # print('ZipSubFile: opened; size={}'.format(self.size)) |
| 261 | return self | 259 | return self |
| 262 | 260 | ||
| 263 | - def write(self, *args, **kwargs): # pylint: disable=unused-argument,no-self-use | 261 | + def write(self, *args, **kwargs): |
| 264 | """ write is not allowed """ | 262 | """ write is not allowed """ |
| 265 | raise IOError('writing not implemented') | 263 | raise IOError('writing not implemented') |
| 266 | 264 | ||
| @@ -318,10 +316,9 @@ class ZipSubFile(object): | @@ -318,10 +316,9 @@ class ZipSubFile(object): | ||
| 318 | """ helper for seek: skip forward by given amount using read() """ | 316 | """ helper for seek: skip forward by given amount using read() """ |
| 319 | # print('ZipSubFile: seek by skipping {} bytes starting at {}' | 317 | # print('ZipSubFile: seek by skipping {} bytes starting at {}' |
| 320 | # .format(self.pos, to_skip)) | 318 | # .format(self.pos, to_skip)) |
| 321 | - CHUNK_SIZE = 4096 | ||
| 322 | - n_chunks, leftover = divmod(to_skip, CHUNK_SIZE) | 319 | + n_chunks, leftover = divmod(to_skip, self.CHUNK_SIZE) |
| 323 | for _ in range(n_chunks): | 320 | for _ in range(n_chunks): |
| 324 | - self.read(CHUNK_SIZE) # just read and discard | 321 | + self.read(self.CHUNK_SIZE) # just read and discard |
| 325 | self.read(leftover) | 322 | self.read(leftover) |
| 326 | # print('ZipSubFile: seek by skipping done, pos now {}' | 323 | # print('ZipSubFile: seek by skipping done, pos now {}' |
| 327 | # .format(self.pos)) | 324 | # .format(self.pos)) |
| @@ -424,8 +421,7 @@ class XmlParser(object): | @@ -424,8 +421,7 @@ class XmlParser(object): | ||
| 424 | if match: | 421 | if match: |
| 425 | self._is_single_xml = True | 422 | self._is_single_xml = True |
| 426 | return True | 423 | return True |
| 427 | - if not match: | ||
| 428 | - raise BadOOXML(self.filename, 'is no zip and has no prog_id') | 424 | + raise BadOOXML(self.filename, 'is no zip and has no prog_id') |
| 429 | 425 | ||
| 430 | def iter_files(self, args=None): | 426 | def iter_files(self, args=None): |
| 431 | """ Find files in zip or just give single xml file """ | 427 | """ Find files in zip or just give single xml file """ |
| @@ -509,7 +505,7 @@ class XmlParser(object): | @@ -509,7 +505,7 @@ class XmlParser(object): | ||
| 509 | if event == 'start': | 505 | if event == 'start': |
| 510 | if elem.tag in want_tags: | 506 | if elem.tag in want_tags: |
| 511 | logger.debug('remember start of tag {0} at {1}' | 507 | logger.debug('remember start of tag {0} at {1}' |
| 512 | - .format(elem.tag, depth)) | 508 | + .format(elem.tag, depth)) |
| 513 | inside_tags.append((elem.tag, depth)) | 509 | inside_tags.append((elem.tag, depth)) |
| 514 | depth += 1 | 510 | depth += 1 |
| 515 | continue | 511 | continue |
| @@ -525,18 +521,18 @@ class XmlParser(object): | @@ -525,18 +521,18 @@ class XmlParser(object): | ||
| 525 | inside_tags.pop() | 521 | inside_tags.pop() |
| 526 | else: | 522 | else: |
| 527 | logger.error('found end for wanted tag {0} ' | 523 | logger.error('found end for wanted tag {0} ' |
| 528 | - 'but last start tag {1} does not' | ||
| 529 | - ' match'.format(curr_tag, | ||
| 530 | - inside_tags[-1])) | 524 | + 'but last start tag {1} does not' |
| 525 | + ' match'.format(curr_tag, | ||
| 526 | + inside_tags[-1])) | ||
| 531 | # try to recover: close all deeper tags | 527 | # try to recover: close all deeper tags |
| 532 | while inside_tags and \ | 528 | while inside_tags and \ |
| 533 | inside_tags[-1][1] >= depth: | 529 | inside_tags[-1][1] >= depth: |
| 534 | logger.debug('recover: pop {0}' | 530 | logger.debug('recover: pop {0}' |
| 535 | - .format(inside_tags[-1])) | 531 | + .format(inside_tags[-1])) |
| 536 | inside_tags.pop() | 532 | inside_tags.pop() |
| 537 | except IndexError: # no inside_tag[-1] | 533 | except IndexError: # no inside_tag[-1] |
| 538 | logger.error('found end of {0} at depth {1} but ' | 534 | logger.error('found end of {0} at depth {1} but ' |
| 539 | - 'no start event') | 535 | + 'no start event') |
| 540 | # yield element | 536 | # yield element |
| 541 | if is_wanted or not want_tags: | 537 | if is_wanted or not want_tags: |
| 542 | yield subfile, elem, depth | 538 | yield subfile, elem, depth |
| @@ -581,15 +577,15 @@ class XmlParser(object): | @@ -581,15 +577,15 @@ class XmlParser(object): | ||
| 581 | if extension.startswith('.'): | 577 | if extension.startswith('.'): |
| 582 | extension = extension[1:] | 578 | extension = extension[1:] |
| 583 | defaults.append((extension, elem.attrib['ContentType'])) | 579 | defaults.append((extension, elem.attrib['ContentType'])) |
| 584 | - logger.debug('found content type for extension {0[0]}: {0[1]}' | ||
| 585 | - .format(defaults[-1])) | 580 | + logger.debug('found content type for extension {0[0]}: ' |
| 581 | + '{0[1]}'.format(defaults[-1])) | ||
| 586 | elif elem.tag.endswith('Override'): | 582 | elif elem.tag.endswith('Override'): |
| 587 | subfile = elem.attrib['PartName'] | 583 | subfile = elem.attrib['PartName'] |
| 588 | if subfile.startswith('/'): | 584 | if subfile.startswith('/'): |
| 589 | subfile = subfile[1:] | 585 | subfile = subfile[1:] |
| 590 | files.append((subfile, elem.attrib['ContentType'])) | 586 | files.append((subfile, elem.attrib['ContentType'])) |
| 591 | - logger.debug('found content type for subfile {0[0]}: {0[1]}' | ||
| 592 | - .format(files[-1])) | 587 | + logger.debug('found content type for subfile {0[0]}: ' |
| 588 | + '{0[1]}'.format(files[-1])) | ||
| 593 | except BadOOXML as oo_err: | 589 | except BadOOXML as oo_err: |
| 594 | if oo_err.more_info.startswith('invalid subfile') and \ | 590 | if oo_err.more_info.startswith('invalid subfile') and \ |
| 595 | FILE_CONTENT_TYPES in oo_err.more_info: | 591 | FILE_CONTENT_TYPES in oo_err.more_info: |
| @@ -614,7 +610,7 @@ class XmlParser(object): | @@ -614,7 +610,7 @@ class XmlParser(object): | ||
| 614 | """ | 610 | """ |
| 615 | if not self.did_iter_all: | 611 | if not self.did_iter_all: |
| 616 | logger.warning('Did not iterate through complete file. ' | 612 | logger.warning('Did not iterate through complete file. ' |
| 617 | - 'Should run iter_xml() without args, first.') | 613 | + 'Should run iter_xml() without args, first.') |
| 618 | if not self.subfiles_no_xml: | 614 | if not self.subfiles_no_xml: |
| 619 | return | 615 | return |
| 620 | 616 |