Commit 26a592fb1b983ff7dbf599f01ffc784f4223d18a
1 parent
6028d9ab
msodde: clean-up code following pep8 and pylint (2)
- move imports further up - simplify code for oletools import hack - make a few variable names longer
Showing
1 changed file
with
47 additions
and
45 deletions
oletools/msodde.py
| @@ -43,9 +43,40 @@ http://www.decalage.info/python/oletools | @@ -43,9 +43,40 @@ http://www.decalage.info/python/oletools | ||
| 43 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 43 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 44 | # POSSIBILITY OF SUCH DAMAGE. | 44 | # POSSIBILITY OF SUCH DAMAGE. |
| 45 | 45 | ||
| 46 | +# -- IMPORTS ------------------------------------------------------------------ | ||
| 47 | + | ||
| 46 | from __future__ import print_function | 48 | from __future__ import print_function |
| 47 | 49 | ||
| 48 | -#------------------------------------------------------------------------------ | 50 | +import argparse |
| 51 | +import zipfile | ||
| 52 | +import os | ||
| 53 | +from os.path import abspath, dirname | ||
| 54 | +import sys | ||
| 55 | +import json | ||
| 56 | +import logging | ||
| 57 | +import re | ||
| 58 | +import csv | ||
| 59 | + | ||
| 60 | +# import lxml or ElementTree for XML parsing: | ||
| 61 | +try: | ||
| 62 | + # lxml: best performance for XML processing | ||
| 63 | + import lxml.etree as ET | ||
| 64 | +except ImportError: | ||
| 65 | + import xml.etree.cElementTree as ET | ||
| 66 | + | ||
| 67 | +# little hack to allow absolute imports even if oletools is not installed | ||
| 68 | +# Copied from olevba.py | ||
| 69 | +PARENT_DIR = dirname(dirname(abspath(__file__))) | ||
| 70 | +if PARENT_DIR not in sys.path: | ||
| 71 | + sys.path.insert(0, PARENT_DIR) | ||
| 72 | +del PARENT_DIR | ||
| 73 | + | ||
| 74 | +from oletools.thirdparty import olefile | ||
| 75 | +from oletools import ooxml | ||
| 76 | +from oletools import xls_parser | ||
| 77 | +from oletools import rtfobj | ||
| 78 | + | ||
| 79 | +# ----------------------------------------------------------------------------- | ||
| 49 | # CHANGELOG: | 80 | # CHANGELOG: |
| 50 | # 2017-10-18 v0.52 PL: - first version | 81 | # 2017-10-18 v0.52 PL: - first version |
| 51 | # 2017-10-20 PL: - fixed issue #202 (handling empty xml tags) | 82 | # 2017-10-20 PL: - fixed issue #202 (handling empty xml tags) |
| @@ -77,36 +108,6 @@ __version__ = '0.52dev9' | @@ -77,36 +108,6 @@ __version__ = '0.52dev9' | ||
| 77 | # REFERENCES: | 108 | # REFERENCES: |
| 78 | 109 | ||
| 79 | 110 | ||
| 80 | -#--- IMPORTS ------------------------------------------------------------------ | ||
| 81 | - | ||
| 82 | -import argparse | ||
| 83 | -import zipfile | ||
| 84 | -import os | ||
| 85 | -import sys | ||
| 86 | -import json | ||
| 87 | -import logging | ||
| 88 | -import re | ||
| 89 | -from struct import unpack | ||
| 90 | - | ||
| 91 | -# import lxml or ElementTree for XML parsing: | ||
| 92 | -try: | ||
| 93 | - # lxml: best performance for XML processing | ||
| 94 | - import lxml.etree as ET | ||
| 95 | -except ImportError: | ||
| 96 | - import xml.etree.cElementTree as ET | ||
| 97 | - | ||
| 98 | -# little hack to allow absolute imports even if oletools is not installed | ||
| 99 | -# Copied from olevba.py | ||
| 100 | -_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) | ||
| 101 | -_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..')) | ||
| 102 | -if not _parent_dir in sys.path: | ||
| 103 | - sys.path.insert(0, _parent_dir) | ||
| 104 | - | ||
| 105 | -from oletools.thirdparty import olefile | ||
| 106 | -from oletools import ooxml | ||
| 107 | -from oletools import xls_parser | ||
| 108 | -from oletools import rtfobj | ||
| 109 | - | ||
| 110 | # === PYTHON 2+3 SUPPORT ====================================================== | 111 | # === PYTHON 2+3 SUPPORT ====================================================== |
| 111 | 112 | ||
| 112 | if sys.version_info[0] >= 3: | 113 | if sys.version_info[0] >= 3: |
| @@ -548,10 +549,10 @@ def process_docx(filepath, field_filter_mode=None): | @@ -548,10 +549,10 @@ def process_docx(filepath, field_filter_mode=None): | ||
| 548 | """ find dde-links (and other fields) in Word 2007+ files """ | 549 | """ find dde-links (and other fields) in Word 2007+ files """ |
| 549 | log.debug('process_docx') | 550 | log.debug('process_docx') |
| 550 | all_fields = [] | 551 | all_fields = [] |
| 551 | - with zipfile.ZipFile(filepath) as z: | ||
| 552 | - for filepath in z.namelist(): | 552 | + with zipfile.ZipFile(filepath) as zipper: |
| 553 | + for filepath in zipper.namelist(): | ||
| 553 | if filepath in LOCATIONS: | 554 | if filepath in LOCATIONS: |
| 554 | - data = z.read(filepath) | 555 | + data = zipper.read(filepath) |
| 555 | fields = process_xml(data) | 556 | fields = process_xml(data) |
| 556 | if len(fields) > 0: | 557 | if len(fields) > 0: |
| 557 | all_fields.extend(fields) | 558 | all_fields.extend(fields) |
| @@ -590,16 +591,17 @@ def process_xml(data): | @@ -590,16 +591,17 @@ def process_xml(data): | ||
| 590 | 591 | ||
| 591 | for subs in root.iter(TAG_W_P): | 592 | for subs in root.iter(TAG_W_P): |
| 592 | elem = None | 593 | elem = None |
| 593 | - for e in subs: | ||
| 594 | - if e.tag == TAG_W_R: | ||
| 595 | - for child in e: | ||
| 596 | - if child.tag == TAG_W_FLDCHAR or child.tag == TAG_W_INSTRTEXT: | 594 | + for curr_elem in subs: |
| 597 | # check if w:r; parse children to pull out first FLDCHAR/INSTRTEXT | 595 | # check if w:r; parse children to pull out first FLDCHAR/INSTRTEXT |
| 596 | + if curr_elem.tag == TAG_W_R: | ||
| 597 | + for child in curr_elem: | ||
| 598 | + if child.tag == TAG_W_FLDCHAR or \ | ||
| 599 | + child.tag == TAG_W_INSTRTEXT: | ||
| 598 | elem = child | 600 | elem = child |
| 599 | break | 601 | break |
| 600 | else: | 602 | else: |
| 601 | - elem = e | ||
| 602 | - #this should be an error condition | 603 | + elem = curr_elem |
| 604 | + # this should be an error condition | ||
| 603 | if elem is None: | 605 | if elem is None: |
| 604 | continue | 606 | continue |
| 605 | 607 | ||
| @@ -633,12 +635,12 @@ def unquote(field): | @@ -633,12 +635,12 @@ def unquote(field): | ||
| 633 | # split into components | 635 | # split into components |
| 634 | parts = field.strip().split(" ") | 636 | parts = field.strip().split(" ") |
| 635 | ddestr = "" | 637 | ddestr = "" |
| 636 | - for p in parts[1:]: | ||
| 637 | - try: | ||
| 638 | - ch = chr(int(p)) | 638 | + for part in parts[1:]: |
| 639 | + try: | ||
| 640 | + character = chr(int(part)) | ||
| 639 | except ValueError: | 641 | except ValueError: |
| 640 | - ch = p | ||
| 641 | - ddestr += ch | 642 | + character = part |
| 643 | + ddestr += character | ||
| 642 | return ddestr | 644 | return ddestr |
| 643 | 645 | ||
| 644 | 646 |