Commit 26a592fb1b983ff7dbf599f01ffc784f4223d18a

Authored by Christian Herdtweck
1 parent 6028d9ab

msodde: clean-up code following pep8 and pylint (2)

- move imports further up
- simplify code for oletools import hack
- make a few variable names longer
Showing 1 changed file with 47 additions and 45 deletions
oletools/msodde.py
@@ -43,9 +43,40 @@ http://www.decalage.info/python/oletools @@ -43,9 +43,40 @@ http://www.decalage.info/python/oletools
43 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 43 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
44 # POSSIBILITY OF SUCH DAMAGE. 44 # POSSIBILITY OF SUCH DAMAGE.
45 45
  46 +# -- IMPORTS ------------------------------------------------------------------
  47 +
46 from __future__ import print_function 48 from __future__ import print_function
47 49
48 -#------------------------------------------------------------------------------ 50 +import argparse
  51 +import zipfile
  52 +import os
  53 +from os.path import abspath, dirname
  54 +import sys
  55 +import json
  56 +import logging
  57 +import re
  58 +import csv
  59 +
  60 +# import lxml or ElementTree for XML parsing:
  61 +try:
  62 + # lxml: best performance for XML processing
  63 + import lxml.etree as ET
  64 +except ImportError:
  65 + import xml.etree.cElementTree as ET
  66 +
  67 +# little hack to allow absolute imports even if oletools is not installed
  68 +# Copied from olevba.py
  69 +PARENT_DIR = dirname(dirname(abspath(__file__)))
  70 +if PARENT_DIR not in sys.path:
  71 + sys.path.insert(0, PARENT_DIR)
  72 +del PARENT_DIR
  73 +
  74 +from oletools.thirdparty import olefile
  75 +from oletools import ooxml
  76 +from oletools import xls_parser
  77 +from oletools import rtfobj
  78 +
  79 +# -----------------------------------------------------------------------------
49 # CHANGELOG: 80 # CHANGELOG:
50 # 2017-10-18 v0.52 PL: - first version 81 # 2017-10-18 v0.52 PL: - first version
51 # 2017-10-20 PL: - fixed issue #202 (handling empty xml tags) 82 # 2017-10-20 PL: - fixed issue #202 (handling empty xml tags)
@@ -77,36 +108,6 @@ __version__ = '0.52dev9' @@ -77,36 +108,6 @@ __version__ = '0.52dev9'
77 # REFERENCES: 108 # REFERENCES:
78 109
79 110
80 -#--- IMPORTS ------------------------------------------------------------------  
81 -  
82 -import argparse  
83 -import zipfile  
84 -import os  
85 -import sys  
86 -import json  
87 -import logging  
88 -import re  
89 -from struct import unpack  
90 -  
91 -# import lxml or ElementTree for XML parsing:  
92 -try:  
93 - # lxml: best performance for XML processing  
94 - import lxml.etree as ET  
95 -except ImportError:  
96 - import xml.etree.cElementTree as ET  
97 -  
98 -# little hack to allow absolute imports even if oletools is not installed  
99 -# Copied from olevba.py  
100 -_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))  
101 -_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))  
102 -if not _parent_dir in sys.path:  
103 - sys.path.insert(0, _parent_dir)  
104 -  
105 -from oletools.thirdparty import olefile  
106 -from oletools import ooxml  
107 -from oletools import xls_parser  
108 -from oletools import rtfobj  
109 -  
110 # === PYTHON 2+3 SUPPORT ====================================================== 111 # === PYTHON 2+3 SUPPORT ======================================================
111 112
112 if sys.version_info[0] >= 3: 113 if sys.version_info[0] >= 3:
@@ -548,10 +549,10 @@ def process_docx(filepath, field_filter_mode=None): @@ -548,10 +549,10 @@ def process_docx(filepath, field_filter_mode=None):
548 """ find dde-links (and other fields) in Word 2007+ files """ 549 """ find dde-links (and other fields) in Word 2007+ files """
549 log.debug('process_docx') 550 log.debug('process_docx')
550 all_fields = [] 551 all_fields = []
551 - with zipfile.ZipFile(filepath) as z:  
552 - for filepath in z.namelist(): 552 + with zipfile.ZipFile(filepath) as zipper:
  553 + for filepath in zipper.namelist():
553 if filepath in LOCATIONS: 554 if filepath in LOCATIONS:
554 - data = z.read(filepath) 555 + data = zipper.read(filepath)
555 fields = process_xml(data) 556 fields = process_xml(data)
556 if len(fields) > 0: 557 if len(fields) > 0:
557 all_fields.extend(fields) 558 all_fields.extend(fields)
@@ -590,16 +591,17 @@ def process_xml(data): @@ -590,16 +591,17 @@ def process_xml(data):
590 591
591 for subs in root.iter(TAG_W_P): 592 for subs in root.iter(TAG_W_P):
592 elem = None 593 elem = None
593 - for e in subs:  
594 - if e.tag == TAG_W_R:  
595 - for child in e:  
596 - if child.tag == TAG_W_FLDCHAR or child.tag == TAG_W_INSTRTEXT: 594 + for curr_elem in subs:
597 # check if w:r; parse children to pull out first FLDCHAR/INSTRTEXT 595 # check if w:r; parse children to pull out first FLDCHAR/INSTRTEXT
  596 + if curr_elem.tag == TAG_W_R:
  597 + for child in curr_elem:
  598 + if child.tag == TAG_W_FLDCHAR or \
  599 + child.tag == TAG_W_INSTRTEXT:
598 elem = child 600 elem = child
599 break 601 break
600 else: 602 else:
601 - elem = e  
602 - #this should be an error condition 603 + elem = curr_elem
  604 + # this should be an error condition
603 if elem is None: 605 if elem is None:
604 continue 606 continue
605 607
@@ -633,12 +635,12 @@ def unquote(field): @@ -633,12 +635,12 @@ def unquote(field):
633 # split into components 635 # split into components
634 parts = field.strip().split(" ") 636 parts = field.strip().split(" ")
635 ddestr = "" 637 ddestr = ""
636 - for p in parts[1:]:  
637 - try:  
638 - ch = chr(int(p)) 638 + for part in parts[1:]:
  639 + try:
  640 + character = chr(int(part))
639 except ValueError: 641 except ValueError:
640 - ch = p  
641 - ddestr += ch 642 + character = part
  643 + ddestr += character
642 return ddestr 644 return ddestr
643 645
644 646