Commit 26a592fb1b983ff7dbf599f01ffc784f4223d18a
1 parent
6028d9ab
msodde: clean-up code following pep8 and pylint (2)
- move imports further up - simplify code for oletools import hack - make a few variable names longer
Showing
1 changed file
with
47 additions
and
45 deletions
oletools/msodde.py
| ... | ... | @@ -43,9 +43,40 @@ http://www.decalage.info/python/oletools |
| 43 | 43 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 44 | 44 | # POSSIBILITY OF SUCH DAMAGE. |
| 45 | 45 | |
| 46 | +# -- IMPORTS ------------------------------------------------------------------ | |
| 47 | + | |
| 46 | 48 | from __future__ import print_function |
| 47 | 49 | |
| 48 | -#------------------------------------------------------------------------------ | |
| 50 | +import argparse | |
| 51 | +import zipfile | |
| 52 | +import os | |
| 53 | +from os.path import abspath, dirname | |
| 54 | +import sys | |
| 55 | +import json | |
| 56 | +import logging | |
| 57 | +import re | |
| 58 | +import csv | |
| 59 | + | |
| 60 | +# import lxml or ElementTree for XML parsing: | |
| 61 | +try: | |
| 62 | + # lxml: best performance for XML processing | |
| 63 | + import lxml.etree as ET | |
| 64 | +except ImportError: | |
| 65 | + import xml.etree.cElementTree as ET | |
| 66 | + | |
| 67 | +# little hack to allow absolute imports even if oletools is not installed | |
| 68 | +# Copied from olevba.py | |
| 69 | +PARENT_DIR = dirname(dirname(abspath(__file__))) | |
| 70 | +if PARENT_DIR not in sys.path: | |
| 71 | + sys.path.insert(0, PARENT_DIR) | |
| 72 | +del PARENT_DIR | |
| 73 | + | |
| 74 | +from oletools.thirdparty import olefile | |
| 75 | +from oletools import ooxml | |
| 76 | +from oletools import xls_parser | |
| 77 | +from oletools import rtfobj | |
| 78 | + | |
| 79 | +# ----------------------------------------------------------------------------- | |
| 49 | 80 | # CHANGELOG: |
| 50 | 81 | # 2017-10-18 v0.52 PL: - first version |
| 51 | 82 | # 2017-10-20 PL: - fixed issue #202 (handling empty xml tags) |
| ... | ... | @@ -77,36 +108,6 @@ __version__ = '0.52dev9' |
| 77 | 108 | # REFERENCES: |
| 78 | 109 | |
| 79 | 110 | |
| 80 | -#--- IMPORTS ------------------------------------------------------------------ | |
| 81 | - | |
| 82 | -import argparse | |
| 83 | -import zipfile | |
| 84 | -import os | |
| 85 | -import sys | |
| 86 | -import json | |
| 87 | -import logging | |
| 88 | -import re | |
| 89 | -from struct import unpack | |
| 90 | - | |
| 91 | -# import lxml or ElementTree for XML parsing: | |
| 92 | -try: | |
| 93 | - # lxml: best performance for XML processing | |
| 94 | - import lxml.etree as ET | |
| 95 | -except ImportError: | |
| 96 | - import xml.etree.cElementTree as ET | |
| 97 | - | |
| 98 | -# little hack to allow absolute imports even if oletools is not installed | |
| 99 | -# Copied from olevba.py | |
| 100 | -_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) | |
| 101 | -_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..')) | |
| 102 | -if not _parent_dir in sys.path: | |
| 103 | - sys.path.insert(0, _parent_dir) | |
| 104 | - | |
| 105 | -from oletools.thirdparty import olefile | |
| 106 | -from oletools import ooxml | |
| 107 | -from oletools import xls_parser | |
| 108 | -from oletools import rtfobj | |
| 109 | - | |
| 110 | 111 | # === PYTHON 2+3 SUPPORT ====================================================== |
| 111 | 112 | |
| 112 | 113 | if sys.version_info[0] >= 3: |
| ... | ... | @@ -548,10 +549,10 @@ def process_docx(filepath, field_filter_mode=None): |
| 548 | 549 | """ find dde-links (and other fields) in Word 2007+ files """ |
| 549 | 550 | log.debug('process_docx') |
| 550 | 551 | all_fields = [] |
| 551 | - with zipfile.ZipFile(filepath) as z: | |
| 552 | - for filepath in z.namelist(): | |
| 552 | + with zipfile.ZipFile(filepath) as zipper: | |
| 553 | + for filepath in zipper.namelist(): | |
| 553 | 554 | if filepath in LOCATIONS: |
| 554 | - data = z.read(filepath) | |
| 555 | + data = zipper.read(filepath) | |
| 555 | 556 | fields = process_xml(data) |
| 556 | 557 | if len(fields) > 0: |
| 557 | 558 | all_fields.extend(fields) |
| ... | ... | @@ -590,16 +591,17 @@ def process_xml(data): |
| 590 | 591 | |
| 591 | 592 | for subs in root.iter(TAG_W_P): |
| 592 | 593 | elem = None |
| 593 | - for e in subs: | |
| 594 | - if e.tag == TAG_W_R: | |
| 595 | - for child in e: | |
| 596 | - if child.tag == TAG_W_FLDCHAR or child.tag == TAG_W_INSTRTEXT: | |
| 594 | + for curr_elem in subs: | |
| 597 | 595 | # check if w:r; parse children to pull out first FLDCHAR/INSTRTEXT |
| 596 | + if curr_elem.tag == TAG_W_R: | |
| 597 | + for child in curr_elem: | |
| 598 | + if child.tag == TAG_W_FLDCHAR or \ | |
| 599 | + child.tag == TAG_W_INSTRTEXT: | |
| 598 | 600 | elem = child |
| 599 | 601 | break |
| 600 | 602 | else: |
| 601 | - elem = e | |
| 602 | - #this should be an error condition | |
| 603 | + elem = curr_elem | |
| 604 | + # this should be an error condition | |
| 603 | 605 | if elem is None: |
| 604 | 606 | continue |
| 605 | 607 | |
| ... | ... | @@ -633,12 +635,12 @@ def unquote(field): |
| 633 | 635 | # split into components |
| 634 | 636 | parts = field.strip().split(" ") |
| 635 | 637 | ddestr = "" |
| 636 | - for p in parts[1:]: | |
| 637 | - try: | |
| 638 | - ch = chr(int(p)) | |
| 638 | + for part in parts[1:]: | |
| 639 | + try: | |
| 640 | + character = chr(int(part)) | |
| 639 | 641 | except ValueError: |
| 640 | - ch = p | |
| 641 | - ddestr += ch | |
| 642 | + character = part | |
| 643 | + ddestr += character | |
| 642 | 644 | return ddestr |
| 643 | 645 | |
| 644 | 646 | ... | ... |