Commit 26a592fb1b983ff7dbf599f01ffc784f4223d18a

Authored by Christian Herdtweck
1 parent 6028d9ab

msodde: clean-up code following pep8 and pylint (2)

- move imports further up
- simplify code for oletools import hack
- make a few variable names longer
Showing 1 changed file with 47 additions and 45 deletions
oletools/msodde.py
... ... @@ -43,9 +43,40 @@ http://www.decalage.info/python/oletools
43 43 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
44 44 # POSSIBILITY OF SUCH DAMAGE.
45 45  
  46 +# -- IMPORTS ------------------------------------------------------------------
  47 +
46 48 from __future__ import print_function
47 49  
48   -#------------------------------------------------------------------------------
  50 +import argparse
  51 +import zipfile
  52 +import os
  53 +from os.path import abspath, dirname
  54 +import sys
  55 +import json
  56 +import logging
  57 +import re
  58 +import csv
  59 +
  60 +# import lxml or ElementTree for XML parsing:
  61 +try:
  62 + # lxml: best performance for XML processing
  63 + import lxml.etree as ET
  64 +except ImportError:
  65 + import xml.etree.cElementTree as ET
  66 +
  67 +# little hack to allow absolute imports even if oletools is not installed
  68 +# Copied from olevba.py
  69 +PARENT_DIR = dirname(dirname(abspath(__file__)))
  70 +if PARENT_DIR not in sys.path:
  71 + sys.path.insert(0, PARENT_DIR)
  72 +del PARENT_DIR
  73 +
  74 +from oletools.thirdparty import olefile
  75 +from oletools import ooxml
  76 +from oletools import xls_parser
  77 +from oletools import rtfobj
  78 +
  79 +# -----------------------------------------------------------------------------
49 80 # CHANGELOG:
50 81 # 2017-10-18 v0.52 PL: - first version
51 82 # 2017-10-20 PL: - fixed issue #202 (handling empty xml tags)
... ... @@ -77,36 +108,6 @@ __version__ = '0.52dev9'
77 108 # REFERENCES:
78 109  
79 110  
80   -#--- IMPORTS ------------------------------------------------------------------
81   -
82   -import argparse
83   -import zipfile
84   -import os
85   -import sys
86   -import json
87   -import logging
88   -import re
89   -from struct import unpack
90   -
91   -# import lxml or ElementTree for XML parsing:
92   -try:
93   - # lxml: best performance for XML processing
94   - import lxml.etree as ET
95   -except ImportError:
96   - import xml.etree.cElementTree as ET
97   -
98   -# little hack to allow absolute imports even if oletools is not installed
99   -# Copied from olevba.py
100   -_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
101   -_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
102   -if not _parent_dir in sys.path:
103   - sys.path.insert(0, _parent_dir)
104   -
105   -from oletools.thirdparty import olefile
106   -from oletools import ooxml
107   -from oletools import xls_parser
108   -from oletools import rtfobj
109   -
110 111 # === PYTHON 2+3 SUPPORT ======================================================
111 112  
112 113 if sys.version_info[0] >= 3:
... ... @@ -548,10 +549,10 @@ def process_docx(filepath, field_filter_mode=None):
548 549 """ find dde-links (and other fields) in Word 2007+ files """
549 550 log.debug('process_docx')
550 551 all_fields = []
551   - with zipfile.ZipFile(filepath) as z:
552   - for filepath in z.namelist():
  552 + with zipfile.ZipFile(filepath) as zipper:
  553 + for filepath in zipper.namelist():
553 554 if filepath in LOCATIONS:
554   - data = z.read(filepath)
  555 + data = zipper.read(filepath)
555 556 fields = process_xml(data)
556 557 if len(fields) > 0:
557 558 all_fields.extend(fields)
... ... @@ -590,16 +591,17 @@ def process_xml(data):
590 591  
591 592 for subs in root.iter(TAG_W_P):
592 593 elem = None
593   - for e in subs:
594   - if e.tag == TAG_W_R:
595   - for child in e:
596   - if child.tag == TAG_W_FLDCHAR or child.tag == TAG_W_INSTRTEXT:
  594 + for curr_elem in subs:
597 595 # check if w:r; parse children to pull out first FLDCHAR/INSTRTEXT
  596 + if curr_elem.tag == TAG_W_R:
  597 + for child in curr_elem:
  598 + if child.tag == TAG_W_FLDCHAR or \
  599 + child.tag == TAG_W_INSTRTEXT:
598 600 elem = child
599 601 break
600 602 else:
601   - elem = e
602   - #this should be an error condition
  603 + elem = curr_elem
  604 + # this should be an error condition
603 605 if elem is None:
604 606 continue
605 607  
... ... @@ -633,12 +635,12 @@ def unquote(field):
633 635 # split into components
634 636 parts = field.strip().split(" ")
635 637 ddestr = ""
636   - for p in parts[1:]:
637   - try:
638   - ch = chr(int(p))
  638 + for part in parts[1:]:
  639 + try:
  640 + character = chr(int(part))
639 641 except ValueError:
640   - ch = p
641   - ddestr += ch
  642 + character = part
  643 + ddestr += character
642 644 return ddestr
643 645  
644 646  
... ...