Commit 2f9505d9e4a8cb249ceb8c0815d4ee084e4e0d05

Authored by decalage2
1 parent 34e73abf

rtfobj: fixed imports for Python 2+3, fixed hex decoding bug in RtfObjParser (issue #103)

Showing 1 changed file with 21 additions and 7 deletions
oletools/rtfobj.py
... ... @@ -65,8 +65,10 @@ http://www.decalage.info/python/oletools
65 65 # 2016-08-09 PL: - fixed issue #78, improved regex
66 66 # 2016-09-06 PL: - fixed issue #83, backward compatible API
67 67 # 2016-11-17 v0.51 PL: - updated call to oleobj.OleNativeStream
  68 +# 2017-03-12 PL: - fixed imports for Python 2+3
  69 +# - fixed hex decoding bug in RtfObjParser (issue #103)
68 70  
69   -__version__ = '0.51'
  71 +__version__ = '0.51dev2'
70 72  
71 73 # ------------------------------------------------------------------------------
72 74 # TODO:
... ... @@ -83,10 +85,22 @@ __version__ = '0.51'
83 85 import re, os, sys, binascii, logging, optparse
84 86 import os.path
85 87  
86   -from .thirdparty.xglob import xglob
87   -from .thirdparty.tablestream import tablestream
88   -from .oleobj import OleObject, OleNativeStream
89   -from . import oleobj
  88 +# IMPORTANT: it should be possible to run oletools directly as scripts
  89 +# in any directory without installing them with pip or setup.py.
  90 +# In that case, relative imports are NOT usable.
  91 +# And to enable Python 2+3 compatibility, we need to use absolute imports,
  92 +# so we add the oletools parent folder to sys.path (absolute+normalized path):
  93 +_thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
  94 +# print('_thismodule_dir = %r' % _thismodule_dir)
  95 +_parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
  96 +# print('_parent_dir = %r' % _thirdparty_dir)
  97 +if not _parent_dir in sys.path:
  98 + sys.path.insert(0, _parent_dir)
  99 +
  100 +from oletools.thirdparty.xglob import xglob
  101 +from oletools.thirdparty.tablestream import tablestream
  102 +from oletools.oleobj import OleObject, OleNativeStream
  103 +from oletools import oleobj
90 104  
91 105 # === LOGGING =================================================================
92 106  
... ... @@ -528,10 +542,10 @@ class RtfObjParser(RtfParser):
528 542 # Filter out all whitespaces first (just ignored):
529 543 hexdata1 = destination.data.translate(None, b' \t\r\n\f\v')
530 544 # Then filter out any other non-hex character:
531   - hexdata = re.sub(b'[^a-hA-H0-9]', b'', hexdata1)
  545 + hexdata = re.sub(b'[^a-fA-F0-9]', b'', hexdata1)
532 546 if len(hexdata) < len(hexdata1):
533 547 # this is only for debugging:
534   - nonhex = re.sub(b'[a-hA-H0-9]', b'', hexdata1)
  548 + nonhex = re.sub(b'[a-fA-F0-9]', b'', hexdata1)
535 549 log.debug('Found non-hex chars in hexdata: %r' % nonhex)
536 550 # MS Word accepts an extra hex digit, so we need to trim it if present:
537 551 if len(hexdata) & 1:
... ...