diff --git a/oletools/common/io_encoding.py b/oletools/common/io_encoding.py new file mode 100644 index 0000000..b32d82d --- /dev/null +++ b/oletools/common/io_encoding.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 + +""" +Tool to help with input/output encoding + +Helpers to run smoothly in unicode-unfriendly environments like output redirect +or unusual language settings. + +In such settings, output to console falls back to ASCII-only. Also open() +suddenly fails to interprete non-ASCII characters. + +Therefore, at start of scripts can run :py:meth:`ensure_stdout_handles_unicode` +and when opening text files use :py:meth:`uopen` to replace :py:meth:`open`. + +Part of the python-oletools package: +http://www.decalage.info/python/oletools +""" + +# === LICENSE ================================================================= + +# msodde is copyright (c) 2017-2018 Philippe Lagadec (http://www.decalage.info) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +# ----------------------------------------------------------------------------- +# CHANGELOG: +# 2018-11-04 v0.54 CH: - first version: ensure_stdout_handles_unicode, uopen + +# -- IMPORTS ------------------------------------------------------------------ +from __future__ import print_function +import sys +import codecs +import os +from locale import getpreferredencoding + +PY3 = sys.version_info.major >= 3 + +if PY3: + from builtins import open as builtin_open +else: + from __builtin__ import open as builtin_open + +# -- CONSTANTS ---------------------------------------------------------------- +#: encoding to use for redirection if no good encoding can be found +FALLBACK_ENCODING_REDIRECT = 'utf8' + +#: encoding for reading text from files if preferred encoding is non-unicode +FALLBACK_ENCODING_OPEN = 'utf8' + +#: print (pure-ascii) debug output to stdout +DEBUG = False + +# the encoding specified in system environment +try: + PREFERRED_ENCODING = getpreferredencoding() +except Exception as exc: + if DEBUG: + print('Exception getting preferred encoding: {}'.format(exc)) + PREFERRED_ENCODING = None + + +# -- HELPERS =----------------------------------------------------------------- + + +def ensure_stdout_handles_unicode(): + """ + Ensure that print()ing unicode does not lead to errors. + + When print()ing unicode, python relies on the environment (e.g. in linux on + the setting of the LANG environment variable) to tell it how to encode + unicode. That works nicely for modern-day shells where encoding is usually + UTF-8. But as soon as LANG is unset or just "C", or output is redirected or + piped, the encoding falls back to 'ASCII', which cannot handle unicode + characters. + + Based on solutions suggested on stackoverflow (c.f. + https://stackoverflow.com/q/27347772/4405656 ), wrap stdout in an encoder + that solves that problem. + + Unfortunately, stderr cannot be handled the same way ( see e.g. https:// + pythonhosted.org/kitchen/unicode-frustrations.html#frustration-5-exceptions + ), so we still have to hope there is only ascii in error messages + """ + # do not re-wrap + if isinstance(sys.stdout, codecs.StreamWriter): + if DEBUG: + print('sys.stdout wrapped already') + return + + # get output stream object + if PY3: + output_stream = sys.stdout.buffer + else: + output_stream = sys.stdout + + # determine encoding of sys.stdout + try: + encoding = sys.stdout.encoding + except AttributeError: # variable "encoding" might not exist + encoding = None + if DEBUG: + print('sys.stdout encoding is {}'.format(encoding)) + + if isinstance(encoding, str) and encoding.lower().startswith('utf'): + if DEBUG: + print('encoding is acceptable') + return # everything alright, we are working in a good environment + elif os.isatty(output_stream.fileno()): # e.g. C locale + # Do not output UTF8 since that might be mis-interpreted. + # Just replace chars that cannot be handled + print('Encoding for stdout is only {}, will replace other chars to ' + 'avoid unicode error'.format(encoding), file=sys.stderr) + sys.stdout = codecs.getwriter(encoding)(output_stream, errors='replace') + else: # e.g. redirection, pipe in python2 + new_encoding = PREFERRED_ENCODING + if DEBUG: + print('not a tty, try preferred encoding {}'.format(new_encoding)) + if not isinstance(new_encoding, str) \ + or not new_encoding.lower().startswith('utf'): + new_encoding = FALLBACK_ENCODING_REDIRECT + if DEBUG: + print('preferred encoding also unacceptable, fall back to {}' + .format(new_encoding)) + print('Encoding for stdout is only {}, will auto-encode text with {} ' + 'before output'.format(encoding, new_encoding), file=sys.stderr) + sys.stdout = codecs.getwriter(new_encoding)(output_stream) + + +def uopen(filename, mode='r', *args, **kwargs): + """ + Replacement for builtin open() that reads unicode even in ASCII environment + + In order to read unicode from text, python uses locale.getpreferredencoding + to translate bytes to str. If the environment only provides ASCII encoding, + this will fail since most office files contain unicode. + + Therefore, guess a good encoding here if necessary and open file with that. + + :returns: same type as the builtin :py:func:`open` + """ + # do not interfere if not necessary: + if 'b' in mode: + if DEBUG: + print('Opening binary file, do not interfere') + return builtin_open(filename, mode, *args, **kwargs) + if 'encoding' in kwargs: + if DEBUG: + print('Opening file with encoding {!r}, do not interfere' + .format(kwargs['encoding'])) + return builtin_open(filename, mode, *args, **kwargs) + if len(args) > 3: # "encoding" is the 4th arg + if DEBUG: + print('Opening file with encoding {!r}, do not interfere' + .format(args[3])) + return builtin_open(filename, mode, *args, **kwargs) + + # determine preferred encoding + encoding = PREFERRED_ENCODING + if DEBUG: + print('preferred encoding is {}'.format(encoding)) + + if isinstance(encoding, str) and encoding.lower().startswith('utf'): + if DEBUG: + print('encoding is acceptable, open {} regularly'.format(filename)) + return builtin_open(filename, mode, *args, **kwargs) + + # so we want to read text from a file but can probably only deal with ASCII + # --> use fallback + if DEBUG: + print('Opening {} with fallback encoding {}' + .format(filename, FALLBACK_ENCODING_OPEN)) + if PY3: + return builtin_open(filename, mode, *args, + encoding=FALLBACK_ENCODING_OPEN, **kwargs) + else: + handle = builtin_open(filename, mode, *args, **kwargs) + return codecs.EncodedFile(handle, FALLBACK_ENCODING_OPEN) diff --git a/oletools/common/log_helper/log_helper.py b/oletools/common/log_helper/log_helper.py index 7a7fb02..4144d61 100644 --- a/oletools/common/log_helper/log_helper.py +++ b/oletools/common/log_helper/log_helper.py @@ -44,6 +44,7 @@ General logging helpers from ._json_formatter import JsonFormatter from ._logger_adapter import OletoolsLoggerAdapter from . import _root_logger_wrapper +from ..io_encoding import ensure_stdout_handles_unicode import logging import sys @@ -92,6 +93,9 @@ class LogHelper: if self._is_enabled: raise ValueError('re-enabling logging. Not sure whether that is ok...') + if stream in (None, sys.stdout): + ensure_stdout_handles_unicode() + log_level = LOG_LEVELS[level] logging.basicConfig(level=log_level, format=log_format, stream=stream) self._is_enabled = True diff --git a/oletools/msodde.py b/oletools/msodde.py index 50e4802..8707565 100644 --- a/oletools/msodde.py +++ b/oletools/msodde.py @@ -74,6 +74,7 @@ from oletools import xls_parser from oletools import rtfobj from oletools.ppt_record_parser import is_ppt from oletools import crypto +from oletools.common.io_encoding import ensure_stdout_handles_unicode from oletools.common.log_helper import log_helper # ----------------------------------------------------------------------------- @@ -236,57 +237,6 @@ DEFAULT_LOG_LEVEL = "warning" # Default log level logger = log_helper.get_or_create_silent_logger('msodde') -# === UNICODE IN PY2 ========================================================= - -def ensure_stdout_handles_unicode(): - """ Ensure stdout can handle unicode by wrapping it if necessary - - Required e.g. if output of this script is piped or redirected in a linux - shell, since then sys.stdout.encoding is ascii and cannot handle - print(unicode). In that case we need to find some compatible encoding and - wrap sys.stdout into a encoder following (many thanks!) - https://stackoverflow.com/a/1819009 or https://stackoverflow.com/a/20447935 - - Can be undone by setting sys.stdout = sys.__stdout__ - """ - import codecs - import locale - - # do not re-wrap - if isinstance(sys.stdout, codecs.StreamWriter): - return - - # try to find encoding for sys.stdout - encoding = None - try: - encoding = sys.stdout.encoding - except AttributeError: # variable "encoding" might not exist - pass - - if encoding not in (None, '', 'ascii'): - return # no need to wrap - - # try to find an encoding that can handle unicode - try: - encoding = locale.getpreferredencoding() - except Exception: - pass - - # fallback if still no encoding available - if encoding in (None, '', 'ascii'): - encoding = 'utf8' - - # logging is probably not initialized yet, but just in case - logger.debug('wrapping sys.stdout with encoder using {0}'.format(encoding)) - - wrapper = codecs.getwriter(encoding) - sys.stdout = wrapper(sys.stdout) - - -if sys.version_info.major < 3: - ensure_stdout_handles_unicode() # e.g. for print(text) in main() - - # === ARGUMENT PARSING ======================================================= class ArgParserWithBanner(argparse.ArgumentParser): @@ -820,10 +770,15 @@ def process_csv(filepath): chars the same way that excel does. Tested to some extend in unittests. This can only find DDE-links, no other "suspicious" constructs (yet). - """ + Cannot deal with unicode files yet (need more than just use uopen()). + """ results = [] - with open(filepath, 'r') as file_handle: + if sys.version_info.major <= 2: + open_arg = dict(mode='rb') + else: + open_arg = dict(newline='') + with open(filepath, **open_arg) as file_handle: results, dialect = process_csv_dialect(file_handle, CSV_DELIMITERS) is_small = file_handle.tell() < CSV_SMALL_THRESH @@ -854,7 +809,6 @@ def process_csv(filepath): def process_csv_dialect(file_handle, delimiters): """ helper for process_csv: process with a specific csv dialect """ - # determine dialect = delimiter chars, quote chars, ... dialect = csv.Sniffer().sniff(file_handle.read(CSV_SMALL_THRESH), delimiters=delimiters) diff --git a/oletools/olemeta.py b/oletools/olemeta.py index 2c0badd..6131746 100644 --- a/oletools/olemeta.py +++ b/oletools/olemeta.py @@ -79,6 +79,7 @@ if not _parent_dir in sys.path: import olefile from oletools.thirdparty import xglob from oletools.thirdparty.tablestream import tablestream +from oletools.common.io_encoding import ensure_stdout_handles_unicode #=== MAIN ================================================================= @@ -88,13 +89,12 @@ def process_ole(ole): meta = ole.get_metadata() # console output with UTF8 encoding: - # It looks like we do not need the UTF8 codec anymore, both for Python 2 and 3 - console_utf8 = sys.stdout #codecs.getwriter('utf8')(sys.stdout) + ensure_stdout_handles_unicode() # TODO: move similar code to a function print('Properties from the SummaryInformation stream:') - t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'], outfile=console_utf8) + t = tablestream.TableStream([21, 30], header_row=['Property', 'Value']) for prop in meta.SUMMARY_ATTRIBS: value = getattr(meta, prop) if value is not None: @@ -111,7 +111,7 @@ def process_ole(ole): print('') print('Properties from the DocumentSummaryInformation stream:') - t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'], outfile=console_utf8) + t = tablestream.TableStream([21, 30], header_row=['Property', 'Value']) for prop in meta.DOCSUM_ATTRIBS: value = getattr(meta, prop) if value is not None: diff --git a/oletools/oleobj.py b/oletools/oleobj.py index d9cf876..4fd15da 100644 --- a/oletools/oleobj.py +++ b/oletools/oleobj.py @@ -73,6 +73,7 @@ except ImportError: from oletools.ppt_record_parser import (is_ppt, PptFile, PptRecordExOleVbaActiveXAtom) from oletools.ooxml import XmlParser +from oletools.common.io_encoding import ensure_stdout_handles_unicode # ----------------------------------------------------------------------------- # CHANGELOG: @@ -848,6 +849,7 @@ def main(cmd_line_args=None): provide other arguments. """ # print banner with version + ensure_stdout_handles_unicode() print('oleobj %s - http://decalage.info/oletools' % __version__) print('THIS IS WORK IN PROGRESS - Check updates regularly!') print('Please report any issue at ' diff --git a/oletools/olevba.py b/oletools/olevba.py index c088891..9e0ed8d 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -318,6 +318,7 @@ from oletools import ppt_parser from oletools import oleform from oletools import rtfobj from oletools import crypto +from oletools.common.io_encoding import ensure_stdout_handles_unicode from oletools.common import codepages # monkeypatch email to fix issue #32: @@ -4122,7 +4123,6 @@ def main(cmd_line_args=None): in process_args. Per default (cmd_line_args=None), sys.argv is used. Option mainly added for unit-testing """ - options, args = parse_args(cmd_line_args) # provide info about tool and its version diff --git a/oletools/ooxml.py b/oletools/ooxml.py index a36c99d..2d5effa 100644 --- a/oletools/ooxml.py +++ b/oletools/ooxml.py @@ -13,14 +13,47 @@ TODO: check what is duplicate here with oleid, maybe merge some day? TODO: "xml2003" == "flatopc"? .. codeauthor:: Intra2net AG +License: BSD, see source code or documentation + +msodde is part of the python-oletools package: +http://www.decalage.info/python/oletools """ +# === LICENSE ================================================================= + +# msodde is copyright (c) 2017-2019 Philippe Lagadec (http://www.decalage.info) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +# -- IMPORTS ------------------------------------------------------------------ + import sys +from oletools.common.log_helper import log_helper +from oletools.common.io_encoding import uopen from zipfile import ZipFile, BadZipfile, is_zipfile from os.path import splitext import io import re -from oletools.common.log_helper import log_helper # import lxml or ElementTree for XML parsing: try: @@ -29,6 +62,18 @@ try: except ImportError: import xml.etree.cElementTree as ET +# ----------------------------------------------------------------------------- +# CHANGELOG: +# 2018-12-06 CH: - ensure stdout can handle unicode + +__version__ = '0.54.2' + + +############################################################################### +# CONSTANTS +############################################################################### + + logger = log_helper.get_or_create_silent_logger('ooxml') #: subfiles that have to be part of every ooxml file @@ -127,7 +172,7 @@ def get_type(filename): parser = XmlParser(filename) if parser.is_single_xml(): match = None - with open(filename, 'r') as handle: + with uopen(filename, 'r') as handle: match = re.search(OFFICE_XML_PROGID_REGEX, handle.read(1024)) if not match: return DOCTYPE_NONE @@ -416,7 +461,7 @@ class XmlParser(object): # find prog id in xml prolog match = None - with open(self.filename, 'r') as handle: + with uopen(self.filename, 'r') as handle: match = re.search(OFFICE_XML_PROGID_REGEX, handle.read(1024)) if match: self._is_single_xml = True @@ -424,11 +469,18 @@ class XmlParser(object): raise BadOOXML(self.filename, 'is no zip and has no prog_id') def iter_files(self, args=None): - """ Find files in zip or just give single xml file """ + """ + Find files in zip or just give single xml file + + yields pairs (subfile-name, file-handle) where file-handle is an open + file-like object. (Do not care too much about encoding here, the xml + parser reads the encoding from the first lines in the file.) + """ if self.is_single_xml(): if args: raise BadOOXML(self.filename, 'xml has no subfiles') - with open(self.filename, 'r') as handle: + # do not use uopen, xml parser determines encoding on its own + with open(self.filename, 'rb') as handle: yield None, handle # the subfile=None is needed in iter_xml self.did_iter_all = True else: @@ -638,9 +690,10 @@ class XmlParser(object): def test(): - """ Main function, called when running file as script + """ + Test xml parsing; called when running this file as a script. - see module doc for more info + Prints every element found in input file (to be given as command line arg). """ log_helper.enable_logging(False, 'debug') if len(sys.argv) != 2: diff --git a/tests/common/test_encoding_handler.py b/tests/common/test_encoding_handler.py new file mode 100644 index 0000000..b8bdfc6 --- /dev/null +++ b/tests/common/test_encoding_handler.py @@ -0,0 +1,206 @@ +"""Test common.ensure_stdout_handles_unicode""" + +from __future__ import print_function + +import unittest +import sys +from subprocess import check_call, CalledProcessError +from tempfile import mkstemp +import os +from os.path import isfile +from contextlib import contextmanager + +FILE_TEXT = u'The unicode check mark is \u2713.\n' + +@contextmanager +def temp_file(just_name=True): + """Context manager that creates temp file and deletes it in the end""" + tmp_descriptor = None + tmp_name = None + tmp_handle = None + try: + tmp_descriptor, tmp_name = mkstemp() + + # we create our own file handle since we want to be able to close the + # file and open it again for reading. + # We keep the os-level descriptor open so file name is still reserved + # for us + if just_name: + yield tmp_name + else: + tmp_handle = open(tmp_name, 'wb') + yield tmp_handle, tmp_name + except Exception: + raise + finally: + if tmp_descriptor is not None: + os.close(tmp_descriptor) + if tmp_handle is not None: + tmp_handle.close() + if tmp_name is not None and isfile(tmp_name): + os.unlink(tmp_name) + + +class TestEncodingHandler(unittest.TestCase): + """Tests replacing stdout encoding in various scenarios""" + + def test_print(self): + """Test regular unicode output not raise error""" + check_call('{python} {this_file} print'.format(python=sys.executable, + this_file=__file__), + shell=True) + + def test_print_redirect(self): + """ + Test redirection of unicode output to files does not raise error + + TODO: test this on non-linux OSs + """ + with temp_file() as tmp_file: + check_call('{python} {this_file} print > {tmp_file}' + .format(python=sys.executable, this_file=__file__, + tmp_file=tmp_file), + shell=True) + + @unittest.skipIf(not sys.platform.startswith('linux'), + 'Only tested on linux sofar') + def test_print_no_lang(self): + """ + Test redirection of unicode output to files does not raise error + + TODO: Adapt this for other OSs; for win create batch script + """ + check_call('LANG=C {python} {this_file} print' + .format(python=sys.executable, this_file=__file__), + shell=True) + + def test_uopen(self): + """Test that uopen in a nice environment is ok""" + with temp_file(False) as (tmp_handle, tmp_file): + tmp_handle.write(FILE_TEXT.encode('utf8')) + tmp_handle.close() + + try: + check_call('{python} {this_file} read {tmp_file}' + .format(python=sys.executable, this_file=__file__, + tmp_file=tmp_file), + shell=True) + except CalledProcessError as cpe: + self.fail(cpe.output) + + def test_uopen_redirect(self): + """ + Test redirection of unicode output to files does not raise error + + TODO: test this on non-linux OSs + """ + with temp_file(False) as (tmp_handle, tmp_file): + tmp_handle.write(FILE_TEXT.encode('utf8')) + tmp_handle.close() + + with temp_file() as redirect_file: + try: + check_call( + '{python} {this_file} read {tmp_file} >{redirect_file}' + .format(python=sys.executable, this_file=__file__, + tmp_file=tmp_file, redirect_file=redirect_file), + shell=True) + except CalledProcessError as cpe: + self.fail(cpe.output) + + @unittest.skipIf(not sys.platform.startswith('linux'), + 'Only tested on linux sofar') + def test_uopen_no_lang(self): + """ + Test that uopen in a C-LANG environment is ok + + TODO: Adapt this for other OSs; for win create batch script + """ + with temp_file(False) as (tmp_handle, tmp_file): + tmp_handle.write(FILE_TEXT.encode('utf8')) + tmp_handle.close() + + try: + check_call('LANG=C {python} {this_file} read {tmp_file}' + .format(python=sys.executable, this_file=__file__, + tmp_file=tmp_file), + shell=True) + except CalledProcessError as cpe: + self.fail(cpe.output) + + +def run_read(filename): + """This is called from test_uopen* tests as script. Reads text, compares""" + from oletools.common.io_encoding import uopen + # open file + with uopen(filename, 'rt') as reader: + # a few tests + if reader.closed: + raise ValueError('handle is closed!') + if reader.name != filename: + raise ValueError('Wrong filename {}'.format(reader.name)) + if reader.isatty(): + raise ValueError('Reader is a tty!') + if reader.tell() != 0: + raise ValueError('Reader.tell is not 0 at beginning') + + # read text + text = reader.read() + + # a few more tests + if not reader.closed: + raise ValueError('Reader is not closed outside context') + if reader.name != filename: + raise ValueError('Wrong filename {} after context'.format(reader.name)) + if reader.isatty(): + raise ValueError('Reader has become a tty!') + + # compare text + if sys.version_info.major <= 2: # in python2 get encoded byte string + expect = FILE_TEXT.encode('utf8') + else: # python3: should get real unicode + expect = FILE_TEXT + if text != expect: + raise ValueError('Wrong contents: {!r} != {!r}' + .format(text, expect)) + return 0 + + +def run_print(): + """This is called from test_read* tests as script. Prints & logs unicode""" + from oletools.common.io_encoding import ensure_stdout_handles_unicode + from oletools.common.log_helper import log_helper + ensure_stdout_handles_unicode() + print(u'Check: \u2713') # print check mark + + # check logging as well + logger = log_helper.get_or_create_silent_logger('test_encoding_handler') + log_helper.enable_logging(False, 'debug', stream=sys.stdout) + logger.info(u'Check: \u2713') + return 0 + + +# tests call this file as script +if __name__ == '__main__': + if len(sys.argv) < 2: + sys.exit(unittest.main()) + + # hack required to import common from parent dir, not system-wide one + # (usually unittest seems to do that for us) + from os.path import abspath, dirname, join + ole_base = dirname(dirname(dirname(abspath(__file__)))) + sys.path.insert(0, ole_base) + + if sys.argv[1] == 'print': + if len(sys.argv) > 2: + print('Expect no arg for "print"', file=sys.stderr) + sys.exit(2) + sys.exit(run_print()) + elif sys.argv[1] == 'read': + if len(sys.argv) != 3: + print('Expect single arg for "read"', file=sys.stderr) + sys.exit(2) + sys.exit(run_read(sys.argv[2])) + else: + print('Unexpected argument: {}'.format(sys.argv[1]), file=sys.stderr) + sys.exit(2) diff --git a/tests/oleobj/test_basic.py b/tests/oleobj/test_basic.py index 8ad0ef5..f2c2a8f 100644 --- a/tests/oleobj/test_basic.py +++ b/tests/oleobj/test_basic.py @@ -10,6 +10,7 @@ from glob import glob # Directory with test data, independent of current working directory from tests.test_utils import DATA_BASE_DIR, call_and_capture from oletools import oleobj +from oletools.common.io_encoding import ensure_stdout_handles_unicode #: provide some more info to find errors @@ -61,6 +62,7 @@ def calc_md5(filename): def preread_file(args): """helper for TestOleObj.test_non_streamed: preread + call process_file""" + ensure_stdout_handles_unicode() # usually, main() call this ignore_arg, output_dir, filename = args if ignore_arg != '-d': raise ValueError('ignore_arg not as expected!')